use anyhow::{Context as _, Result};
use chrono::{DateTime, TimeZone, Utc};
use git2::{BlameOptions, DiffOptions, Oid, Repository, Sort};
use rayon::prelude::*;
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
pub struct CommitStats {
pub hash: git2::Oid,
pub date: DateTime<Utc>,
pub message: String,
pub author_email: String,
pub files: Vec<FileStats>,
}
#[derive(Debug, Clone)]
pub struct FileStats {
pub path: PathBuf,
pub additions: usize,
pub deletions: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BlameLineInfo {
pub author: String,
pub commit_hash: String,
}
#[derive(Debug, Clone, Default)]
pub struct BlameData {
pub lines: HashMap<usize, BlameLineInfo>,
}
pub struct Git2Repository {
repo_path: PathBuf,
}
impl Git2Repository {
pub fn open(path: &Path) -> Result<Self> {
let repo = Repository::discover(path)
.with_context(|| format!("Failed to discover git repository at {}", path.display()))?;
let repo_path = repo
.workdir()
.ok_or_else(|| anyhow::anyhow!("Bare repositories are not supported"))?
.to_path_buf();
Ok(Self { repo_path })
}
pub fn repo_path(&self) -> &Path {
&self.repo_path
}
pub(super) fn open_repo(&self) -> Result<Repository> {
Repository::open(&self.repo_path)
.with_context(|| format!("Failed to open repository at {}", self.repo_path.display()))
}
pub fn count_file_commits(&self, file_path: &Path) -> Result<usize> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let count = revwalk
.filter_map(|oid| oid.ok())
.filter_map(|oid| repo.find_commit(oid).ok())
.filter(|commit| self.commit_touches_file(&repo, commit, &relative_path))
.count();
Ok(count)
}
pub fn file_age_days(&self, file_path: &Path, now: DateTime<Utc>) -> Result<u32> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME | Sort::REVERSE)?;
let first_commit = revwalk
.filter_map(|oid| oid.ok())
.filter_map(|oid| repo.find_commit(oid).ok())
.find(|commit| self.commit_touches_file(&repo, commit, &relative_path));
match first_commit {
Some(commit) => {
let time = commit.time();
let commit_date = Utc.timestamp_opt(time.seconds(), 0).single();
match commit_date {
Some(date) => {
let age = now.signed_duration_since(date);
Ok(age.num_days().max(0) as u32)
}
None => Ok(0),
}
}
None => Ok(0),
}
}
pub fn file_authors(&self, file_path: &Path) -> Result<HashSet<String>> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let authors: HashSet<String> = revwalk
.filter_map(|oid| oid.ok())
.filter_map(|oid| repo.find_commit(oid).ok())
.filter(|commit| self.commit_touches_file(&repo, commit, &relative_path))
.filter_map(|commit| commit.author().email().map(String::from))
.collect();
Ok(authors)
}
pub fn file_last_modified(&self, file_path: &Path) -> Result<Option<DateTime<Utc>>> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let last_commit = revwalk
.filter_map(|oid| oid.ok())
.filter_map(|oid| repo.find_commit(oid).ok())
.find(|commit| self.commit_touches_file(&repo, commit, &relative_path));
match last_commit {
Some(commit) => {
let time = commit.time();
Ok(Utc.timestamp_opt(time.seconds(), 0).single())
}
None => Ok(None),
}
}
pub fn blame_file(&self, file_path: &Path) -> Result<BlameData> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let relative_str = relative_path.to_string_lossy();
let mut opts = BlameOptions::new();
opts.track_copies_same_file(true);
let blame = repo
.blame_file(Path::new(relative_str.as_ref()), Some(&mut opts))
.with_context(|| format!("Failed to blame file {}", file_path.display()))?;
let mut lines = HashMap::new();
for hunk in blame.iter() {
let sig = hunk.final_signature();
let author = sig.name().unwrap_or("Unknown").to_string();
let commit_hash = hunk.final_commit_id().to_string();
let start_line = hunk.final_start_line();
let num_lines = hunk.lines_in_hunk();
for i in 0..num_lines {
let line_num = start_line + i;
lines.insert(
line_num,
BlameLineInfo {
author: author.clone(),
commit_hash: commit_hash.clone(),
},
);
}
}
Ok(BlameData { lines })
}
pub fn find_introduction(
&self,
file_path: &Path,
pattern: &str,
) -> Result<Option<(git2::Oid, DateTime<Utc>)>> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME | Sort::REVERSE)?;
for oid in revwalk.filter_map(|r| r.ok()) {
let commit = repo.find_commit(oid)?;
if self.commit_introduces_pattern(&repo, &commit, &relative_path, pattern)? {
let time = commit.time();
if let Some(date) = Utc.timestamp_opt(time.seconds(), 0).single() {
return Ok(Some((oid, date)));
}
}
}
Ok(None)
}
pub fn find_modifications(
&self,
file_path: &Path,
pattern: &str,
after_commit: git2::Oid,
) -> Result<Vec<CommitStats>> {
let regex = regex::Regex::new(pattern)?;
self.find_modifications_with_regex(file_path, ®ex, after_commit)
}
pub fn find_modifications_with_regex(
&self,
file_path: &Path,
regex: ®ex::Regex,
after_commit: git2::Oid,
) -> Result<Vec<CommitStats>> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
if after_commit != Oid::zero() {
revwalk.hide(after_commit)?;
}
revwalk.set_sorting(Sort::TIME)?;
let mut results = Vec::new();
for oid in revwalk.filter_map(|r| r.ok()) {
let commit = repo.find_commit(oid)?;
if self.commit_modifies_pattern(&repo, &commit, &relative_path, regex)? {
if let Some(stats) = self.commit_to_basic_stats(&commit)? {
results.push(stats);
}
}
}
Ok(results)
}
pub fn count_bug_fixes(&self, file_path: &Path) -> Result<usize> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let count = revwalk
.filter_map(|oid| oid.ok())
.filter_map(|oid| repo.find_commit(oid).ok())
.filter(|commit| self.commit_touches_file(&repo, commit, &relative_path))
.filter(|commit| commit.message().map(is_bug_fix_message).unwrap_or(false))
.count();
Ok(count)
}
fn to_relative_path(&self, path: &Path) -> PathBuf {
path.strip_prefix(&self.repo_path)
.unwrap_or(path)
.to_path_buf()
}
fn commit_touches_file(
&self,
repo: &Repository,
commit: &git2::Commit,
file_path: &Path,
) -> bool {
let tree = match commit.tree() {
Ok(t) => t,
Err(_) => return false,
};
let file_str = file_path.to_string_lossy();
if tree.get_path(Path::new(file_str.as_ref())).is_err() {
return false;
}
let parent = commit.parents().next();
let parent_tree = parent.and_then(|p| p.tree().ok());
let mut diff_opts = DiffOptions::new();
diff_opts.pathspec(file_str.as_ref());
let diff =
match repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut diff_opts)) {
Ok(d) => d,
Err(_) => return false,
};
diff.deltas().count() > 0
}
fn commit_to_basic_stats(&self, commit: &git2::Commit) -> Result<Option<CommitStats>> {
let time = commit.time();
let date = Utc
.timestamp_opt(time.seconds(), 0)
.single()
.unwrap_or_else(Utc::now);
Ok(Some(CommitStats {
hash: commit.id(),
date,
message: commit.message().unwrap_or("").to_string(),
author_email: commit.author().email().unwrap_or("").to_string(),
files: Vec::new(),
}))
}
fn commit_introduces_pattern(
&self,
repo: &Repository,
commit: &git2::Commit,
file_path: &Path,
pattern: &str,
) -> Result<bool> {
commit_pickaxe_changes_pattern(repo, commit, file_path, pattern)
}
fn commit_modifies_pattern(
&self,
repo: &Repository,
commit: &git2::Commit,
file_path: &Path,
regex: ®ex::Regex,
) -> Result<bool> {
commit_diff_matches_regex(repo, commit, file_path, regex)
}
}
pub fn count_pattern_occurrences(content: &str, pattern: &str) -> usize {
if pattern.is_empty() {
return 0;
}
content.matches(pattern).count()
}
pub fn pattern_occurrences_in_commit(
repo: &Repository,
commit: &git2::Commit,
file_path: &Path,
pattern: &str,
) -> Result<usize> {
let tree = commit.tree()?;
let file_str = file_path.to_string_lossy();
let entry = match tree.get_path(Path::new(file_str.as_ref())) {
Ok(e) => e,
Err(_) => return Ok(0),
};
let blob = repo.find_blob(entry.id())?;
let content = std::str::from_utf8(blob.content()).unwrap_or("");
Ok(count_pattern_occurrences(content, pattern))
}
pub fn commit_pickaxe_changes_pattern(
repo: &Repository,
commit: &git2::Commit,
file_path: &Path,
pattern: &str,
) -> Result<bool> {
let new_count = pattern_occurrences_in_commit(repo, commit, file_path, pattern)?;
let old_count = match commit.parents().next() {
Some(parent) => pattern_occurrences_in_commit(repo, &parent, file_path, pattern)?,
None => 0,
};
Ok(new_count != old_count)
}
#[derive(Debug, Clone, Default)]
pub struct FileFunctionRecord {
pub introduction_oid: Option<git2::Oid>,
pub introduction_date: Option<DateTime<Utc>>,
pub modifications: Vec<CommitStats>,
}
#[derive(Debug, Clone)]
struct CommitFunctionData {
oid: Oid,
date: DateTime<Utc>,
message: String,
author_email: String,
file_churn: HashMap<PathBuf, usize>,
updates: HashMap<(PathBuf, String), (usize, usize, bool)>,
}
#[derive(Debug, Clone)]
pub struct FileCommitScan {
pub date: DateTime<Utc>,
pub message: String,
pub author_email: String,
pub file_churn: HashMap<PathBuf, usize>,
}
pub struct RepoHistoryScan {
pub functions: HashMap<(PathBuf, String), FileFunctionRecord>,
pub file_scans: Vec<FileCommitScan>,
}
pub fn compute_repo_function_histories(
repo_path: &Path,
file_targets: &HashMap<PathBuf, Vec<String>>,
progress_cb: Option<super::batched_function::ProgressCallback<'_>>,
) -> Result<RepoHistoryScan> {
use super::batched_function::GitPreloadPhase;
if file_targets.is_empty() {
return Ok(RepoHistoryScan {
functions: HashMap::new(),
file_scans: Vec::new(),
});
}
let (intro_patterns, mod_regexes) = build_function_pattern_tables(file_targets);
let oids = collect_repo_oids(repo_path)?;
let total = oids.len();
if let Some(cb) = progress_cb {
cb(GitPreloadPhase::Commits, 0, total);
}
let processed = std::sync::atomic::AtomicUsize::new(0);
let mut commit_data: Vec<CommitFunctionData> = oids
.par_iter()
.filter_map(|&oid| {
let repo = Repository::open(repo_path).ok()?;
let data = process_commit_for_function_history(
&repo,
oid,
file_targets,
&intro_patterns,
&mod_regexes,
)
.ok()
.flatten();
let done = processed.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
if let Some(cb) = progress_cb {
if done % 50 == 0 || done == total {
cb(GitPreloadPhase::Commits, done, total);
}
}
data
})
.collect();
commit_data.sort_by_key(|d| d.date);
let file_scans = commit_data
.iter()
.map(|d| FileCommitScan {
date: d.date,
message: d.message.clone(),
author_email: d.author_email.clone(),
file_churn: d.file_churn.clone(),
})
.collect();
Ok(RepoHistoryScan {
functions: reduce_commit_data_to_records(file_targets, &commit_data),
file_scans,
})
}
type IntroPatternTable = HashMap<PathBuf, Vec<(String, String)>>;
type ModRegexTable = HashMap<PathBuf, Vec<(String, regex::Regex)>>;
fn build_function_pattern_tables(
file_targets: &HashMap<PathBuf, Vec<String>>,
) -> (IntroPatternTable, ModRegexTable) {
let intro = file_targets
.iter()
.map(|(file, names)| {
let v = names
.iter()
.map(|n| (n.clone(), format!("fn {n}")))
.collect();
(file.clone(), v)
})
.collect();
let mods = file_targets
.iter()
.map(|(file, names)| {
let v = names
.iter()
.filter_map(|n| {
regex::Regex::new(n)
.or_else(|_| regex::Regex::new(®ex::escape(n)))
.ok()
.map(|r| (n.clone(), r))
})
.collect();
(file.clone(), v)
})
.collect();
(intro, mods)
}
fn collect_repo_oids(repo_path: &Path) -> Result<Vec<Oid>> {
let repo = Repository::open(repo_path)?;
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME | Sort::REVERSE)?;
Ok(revwalk.filter_map(|r| r.ok()).collect())
}
fn process_commit_for_function_history(
repo: &Repository,
oid: Oid,
file_targets: &HashMap<PathBuf, Vec<String>>,
intro_patterns: &HashMap<PathBuf, Vec<(String, String)>>,
mod_regexes: &HashMap<PathBuf, Vec<(String, regex::Regex)>>,
) -> Result<Option<CommitFunctionData>> {
let commit = repo.find_commit(oid)?;
let parent = commit.parents().next();
let parent_tree = parent.as_ref().and_then(|p| p.tree().ok());
let tree = commit.tree()?;
let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
let mut per_file_added: HashMap<PathBuf, Vec<String>> = HashMap::new();
let mut per_file_removed: HashMap<PathBuf, Vec<String>> = HashMap::new();
let mut file_churn: HashMap<PathBuf, usize> = HashMap::new();
diff.foreach(
&mut |_, _| true,
None,
None,
Some(&mut |delta, _, line| {
let Some(path) = delta.new_file().path().or_else(|| delta.old_file().path()) else {
return true;
};
let path_buf = path.to_path_buf();
match line.origin() {
'+' | '-' => {
*file_churn.entry(path_buf.clone()).or_default() += 1;
}
_ => return true,
}
if !file_targets.contains_key(&path_buf) {
return true;
}
let Ok(text) = std::str::from_utf8(line.content()) else {
return true;
};
match line.origin() {
'+' => per_file_added
.entry(path_buf)
.or_default()
.push(text.to_string()),
'-' => per_file_removed
.entry(path_buf)
.or_default()
.push(text.to_string()),
_ => {}
}
true
}),
)?;
if file_churn.is_empty() {
return Ok(None);
}
let touched: HashSet<&PathBuf> = per_file_added
.keys()
.chain(per_file_removed.keys())
.collect();
let mut updates: HashMap<(PathBuf, String), (usize, usize, bool)> = HashMap::new();
let empty: Vec<String> = Vec::new();
for file in touched {
let added = per_file_added.get(file).unwrap_or(&empty);
let removed = per_file_removed.get(file).unwrap_or(&empty);
if let Some(patterns) = intro_patterns.get(file) {
for (name, intro_pat) in patterns {
let added_count: usize = added
.iter()
.map(|l| count_pattern_occurrences(l, intro_pat))
.sum();
let removed_count: usize = removed
.iter()
.map(|l| count_pattern_occurrences(l, intro_pat))
.sum();
let entry = updates.entry((file.clone(), name.clone())).or_default();
entry.0 = added_count;
entry.1 = removed_count;
}
}
if let Some(regexes) = mod_regexes.get(file) {
for (name, regex) in regexes {
let matched = added.iter().any(|l| regex.is_match(l))
|| removed.iter().any(|l| regex.is_match(l));
let entry = updates.entry((file.clone(), name.clone())).or_default();
entry.2 = matched;
}
}
}
let date = Utc
.timestamp_opt(commit.time().seconds(), 0)
.single()
.unwrap_or_else(Utc::now);
let message = commit.message().unwrap_or("").to_string();
let author_email = commit.author().email().unwrap_or("").to_string();
Ok(Some(CommitFunctionData {
oid,
date,
message,
author_email,
file_churn,
updates,
}))
}
fn reduce_commit_data_to_records(
file_targets: &HashMap<PathBuf, Vec<String>>,
commit_data: &[CommitFunctionData],
) -> HashMap<(PathBuf, String), FileFunctionRecord> {
let mut records: HashMap<(PathBuf, String), FileFunctionRecord> = file_targets
.iter()
.flat_map(|(file, names)| {
names
.iter()
.map(move |n| ((file.clone(), n.clone()), FileFunctionRecord::default()))
})
.collect();
for data in commit_data {
for (key, (added, removed, matched)) in &data.updates {
let Some(record) = records.get_mut(key) else {
continue;
};
if record.introduction_oid.is_none() {
if added != removed {
record.introduction_oid = Some(data.oid);
record.introduction_date = Some(data.date);
}
} else if *matched && record.introduction_oid != Some(data.oid) {
record.modifications.push(CommitStats {
hash: data.oid,
date: data.date,
message: data.message.clone(),
author_email: data.author_email.clone(),
files: Vec::new(),
});
}
}
}
records
}
pub fn commit_diff_matches_regex(
repo: &Repository,
commit: &git2::Commit,
file_path: &Path,
regex: ®ex::Regex,
) -> Result<bool> {
let parent = commit.parents().next();
let parent_tree = parent.and_then(|p| p.tree().ok());
let tree = commit.tree()?;
let mut diff_opts = DiffOptions::new();
diff_opts.pathspec(file_path.to_string_lossy().as_ref());
let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut diff_opts))?;
let mut found = false;
diff.foreach(
&mut |_, _| true,
None,
None,
Some(&mut |_, _, line| {
if matches!(line.origin(), '+' | '-') {
if let Ok(content) = std::str::from_utf8(line.content()) {
if regex.is_match(content) {
found = true;
}
}
}
true
}),
)?;
Ok(found)
}
pub fn is_bug_fix_message(message: &str) -> bool {
if is_excluded_commit(message) {
return false;
}
let lowercase = message.to_lowercase();
let words: Vec<&str> = lowercase.split(|c: char| !c.is_alphanumeric()).collect();
words.iter().any(|&word| {
matches!(
word,
"bug" | "fix" | "fixes" | "fixed" | "fixing" | "hotfix"
)
})
}
fn is_excluded_commit(commit_line: &str) -> bool {
let lowercase = commit_line.to_lowercase();
if lowercase.contains("style:")
|| lowercase.contains("chore:")
|| lowercase.contains("docs:")
|| lowercase.contains("test:")
{
return true;
}
let exclusion_keywords = ["formatting", "linting", "whitespace", "typo"];
for keyword in &exclusion_keywords {
if lowercase.contains(keyword) {
return true;
}
}
if lowercase.contains("refactor:") {
let words: Vec<&str> = lowercase.split(|c: char| !c.is_alphanumeric()).collect();
let has_bug_keyword = words.iter().any(|&word| {
matches!(
word,
"bug" | "fix" | "fixes" | "fixed" | "fixing" | "issue" | "hotfix"
)
});
if !has_bug_keyword {
return true;
}
}
false
}
pub fn extract_authors_for_range(
blame_data: &BlameData,
start_line: usize,
end_line: usize,
) -> HashSet<String> {
(start_line..=end_line)
.filter_map(|line| blame_data.lines.get(&line))
.map(|info| info.author.clone())
.filter(|author| !author.is_empty() && author != "Not Committed Yet")
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use std::process::Command;
use tempfile::TempDir;
fn setup_test_repo() -> Result<(TempDir, PathBuf)> {
let temp_dir = TempDir::new()?;
let repo_path = temp_dir.path().to_path_buf();
Command::new("git")
.arg("init")
.current_dir(&repo_path)
.output()?;
Command::new("git")
.args(["config", "user.email", "test@example.com"])
.current_dir(&repo_path)
.output()?;
Command::new("git")
.args(["config", "user.name", "Test User"])
.current_dir(&repo_path)
.output()?;
Ok((temp_dir, repo_path))
}
fn create_and_commit_file(
repo_path: &Path,
file_name: &str,
content: &str,
message: &str,
) -> Result<()> {
let file_path = repo_path.join(file_name);
std::fs::write(&file_path, content)?;
Command::new("git")
.args(["add", file_name])
.current_dir(repo_path)
.output()?;
Command::new("git")
.args(["commit", "-m", message])
.current_dir(repo_path)
.output()?;
Ok(())
}
#[test]
fn test_is_bug_fix_message() {
assert!(is_bug_fix_message("fix: resolve login bug"));
assert!(is_bug_fix_message("Fixed the payment issue"));
assert!(is_bug_fix_message("Bug fix for issue #123"));
assert!(is_bug_fix_message("hotfix: urgent fix"));
assert!(!is_bug_fix_message("style: apply formatting fixes"));
assert!(!is_bug_fix_message("chore: update dependencies"));
assert!(!is_bug_fix_message("docs: fix typo"));
assert!(!is_bug_fix_message("refactor: improve prefix handling"));
assert!(!is_bug_fix_message("Add debugging utilities"));
}
#[test]
fn test_extract_authors_for_range() {
let mut lines = HashMap::new();
lines.insert(
1,
BlameLineInfo {
author: "Alice".into(),
commit_hash: "abc".into(),
},
);
lines.insert(
2,
BlameLineInfo {
author: "Bob".into(),
commit_hash: "def".into(),
},
);
lines.insert(
3,
BlameLineInfo {
author: "Alice".into(),
commit_hash: "abc".into(),
},
);
let blame_data = BlameData { lines };
let authors = extract_authors_for_range(&blame_data, 1, 3);
assert_eq!(authors.len(), 2);
assert!(authors.contains("Alice"));
assert!(authors.contains("Bob"));
}
#[test]
fn test_git2_repository_open() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial commit")?;
let repo = Git2Repository::open(&repo_path)?;
let expected = repo_path.canonicalize().unwrap_or(repo_path);
let actual = repo
.repo_path()
.canonicalize()
.unwrap_or(repo.repo_path().to_path_buf());
assert_eq!(actual, expected);
Ok(())
}
#[test]
fn test_git2_repository_count_commits() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial commit")?;
create_and_commit_file(&repo_path, "test.rs", "fn main() { println!(); }", "Second")?;
create_and_commit_file(&repo_path, "test.rs", "fn main() { dbg!(); }", "Third")?;
let repo = Git2Repository::open(&repo_path)?;
let count = repo.count_file_commits(Path::new("test.rs"))?;
assert_eq!(count, 3);
Ok(())
}
#[test]
fn test_git2_repository_file_authors() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial commit")?;
let repo = Git2Repository::open(&repo_path)?;
let authors = repo.file_authors(Path::new("test.rs"))?;
assert!(authors.contains("test@example.com"));
Ok(())
}
#[test]
fn test_git2_repository_blame() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
let content = "line1\nline2\nline3\n";
create_and_commit_file(&repo_path, "test.txt", content, "Initial commit")?;
let repo = Git2Repository::open(&repo_path)?;
let blame = repo.blame_file(Path::new("test.txt"))?;
assert!(blame.lines.contains_key(&1));
assert!(blame.lines.contains_key(&2));
assert!(blame.lines.contains_key(&3));
Ok(())
}
#[test]
fn test_git2_repository_count_bug_fixes() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial commit")?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn main() { v2 }",
"fix: resolve bug",
)?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn main() { v3 }",
"feat: add feature",
)?;
create_and_commit_file(&repo_path, "test.rs", "fn main() { v4 }", "hotfix: urgent")?;
let repo = Git2Repository::open(&repo_path)?;
let bug_fixes = repo.count_bug_fixes(Path::new("test.rs"))?;
assert_eq!(bug_fixes, 2);
Ok(())
}
fn get_head_oid(repo_path: &Path) -> Result<git2::Oid> {
let output = Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(repo_path)
.output()?;
let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
Ok(git2::Oid::from_str(&hash)?)
}
#[test]
fn test_find_modifications_finds_commits_modifying_pattern() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "let marker = 0;", "Initial")?;
create_and_commit_file(
&repo_path,
"test.rs",
"let marker = 1;",
"First modification",
)?;
create_and_commit_file(
&repo_path,
"test.rs",
"let marker = 2;",
"Second modification",
)?;
let repo = Git2Repository::open(&repo_path)?;
let zero_oid = git2::Oid::zero();
let modifications = repo.find_modifications(Path::new("test.rs"), "marker", zero_oid)?;
assert_eq!(modifications.len(), 3, "Expected 3 modifications");
let messages: Vec<_> = modifications.iter().map(|m| m.message.as_str()).collect();
assert!(
messages.iter().any(|m| m.contains("Initial")),
"Should include Initial"
);
assert!(
messages.iter().any(|m| m.contains("First modification")),
"Should include First modification"
);
assert!(
messages.iter().any(|m| m.contains("Second modification")),
"Should include Second modification"
);
Ok(())
}
#[test]
fn test_find_modifications_returns_empty_when_pattern_not_in_diff() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn stable_pattern() {}\n// placeholder",
"Initial",
)?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn stable_pattern() {}\n// changed placeholder",
"Modify placeholder only",
)?;
let repo = Git2Repository::open(&repo_path)?;
let zero_oid = git2::Oid::zero();
let untouched_modifications =
repo.find_modifications(Path::new("test.rs"), "xyz_never_exists", zero_oid)?;
assert!(
untouched_modifications.is_empty(),
"Expected no modifications for pattern that was never in the diff"
);
Ok(())
}
#[test]
fn test_find_modifications_stops_at_specified_commit() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "let x = 1;", "v1")?;
create_and_commit_file(&repo_path, "test.rs", "let x = 2;", "v2")?;
create_and_commit_file(&repo_path, "test.rs", "let x = 3;", "v3")?;
create_and_commit_file(&repo_path, "test.rs", "let x = 4;", "v4")?;
let latest_oid = get_head_oid(&repo_path)?;
let repo = Git2Repository::open(&repo_path)?;
let modifications = repo.find_modifications(Path::new("test.rs"), r"let x", latest_oid)?;
assert!(
modifications.is_empty(),
"Expected no modifications when range excludes HEAD"
);
let all_mods =
repo.find_modifications(Path::new("test.rs"), r"let x", git2::Oid::zero())?;
assert_eq!(all_mods.len(), 4, "Should find all 4 commits with zero OID");
Ok(())
}
#[test]
fn test_find_introduction_finds_first_occurrence() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn other() {}", "Initial")?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn other() {}\nfn special_marker() {}",
"Add marker",
)?;
let expected_oid = get_head_oid(&repo_path)?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn other() {}\nfn special_marker() { updated }",
"Update",
)?;
let repo = Git2Repository::open(&repo_path)?;
let result = repo.find_introduction(Path::new("test.rs"), "special_marker")?;
assert!(result.is_some());
let (oid, _date) = result.unwrap();
assert_eq!(oid, expected_oid);
Ok(())
}
#[test]
fn test_find_introduction_matches_subprocess_pickaxe() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn my_func() {}", "Initial")?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn my_func() { println!(\"v2\"); }",
"fix",
)?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn my_func() { println!(\"v3\"); }",
"feat",
)?;
let intro_output = std::process::Command::new("git")
.args([
"log",
"-S",
"fn my_func",
"--format=%H",
"--reverse",
"--",
"test.rs",
])
.current_dir(&repo_path)
.output()?;
let cli_intro = String::from_utf8_lossy(&intro_output.stdout)
.lines()
.next()
.unwrap()
.trim()
.to_string();
let repo = Git2Repository::open(&repo_path)?;
let git2_intro = repo
.find_introduction(Path::new("test.rs"), "fn my_func")?
.map(|(oid, _)| oid.to_string());
assert_eq!(
Some(cli_intro),
git2_intro,
"git2 find_introduction must match git log -S --reverse"
);
Ok(())
}
#[test]
fn test_find_modifications_matches_subprocess_two_commit_case() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn my_func() {}", "Initial")?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn my_func() { println!(\"v2\"); }",
"fix",
)?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn my_func() { println!(\"v3\"); }",
"feat",
)?;
let intro_output = std::process::Command::new("git")
.args([
"log",
"-S",
"fn my_func",
"--format=%H",
"--reverse",
"--",
"test.rs",
])
.current_dir(&repo_path)
.output()?;
let intro_hash = String::from_utf8_lossy(&intro_output.stdout)
.lines()
.next()
.unwrap()
.trim()
.to_string();
let mods_output = std::process::Command::new("git")
.args([
"log",
&format!("{intro_hash}..HEAD"),
"-G",
"my_func",
"--format=%H",
"--",
"test.rs",
])
.current_dir(&repo_path)
.output()?;
let cli_count = String::from_utf8_lossy(&mods_output.stdout)
.lines()
.filter(|l| !l.is_empty())
.count();
let repo = Git2Repository::open(&repo_path)?;
let (intro_oid, _) = repo
.find_introduction(Path::new("test.rs"), "fn my_func")?
.expect("intro");
assert_eq!(
intro_oid.to_string(),
intro_hash,
"intro oid must match git log -S"
);
let intro_from_cli = git2::Oid::from_str(&intro_hash)?;
let regex = regex::Regex::new("my_func")?;
let git2_mods =
repo.find_modifications_with_regex(Path::new("test.rs"), ®ex, intro_oid)?;
let git2_mods_cli =
repo.find_modifications_with_regex(Path::new("test.rs"), ®ex, intro_from_cli)?;
assert_eq!(
git2_mods.len(),
git2_mods_cli.len(),
"intro oid source should not matter"
);
let git_repo = repo.open_repo()?;
let regex = regex::Regex::new("my_func")?;
for hash in String::from_utf8_lossy(&mods_output.stdout)
.lines()
.filter(|l| !l.is_empty())
{
let oid = git2::Oid::from_str(hash.trim())?;
let commit = git_repo.find_commit(oid)?;
let matches =
commit_diff_matches_regex(&git_repo, &commit, Path::new("test.rs"), ®ex)?;
assert!(matches, "commit {hash} should match -G per git CLI");
}
assert_eq!(
cli_count,
git2_mods.len(),
"cli={cli_count} git2={} hashes={:?}",
git2_mods.len(),
git2_mods
.iter()
.map(|c| c.hash.to_string())
.collect::<Vec<_>>()
);
Ok(())
}
#[test]
fn test_pickaxe_ignores_context_only_diff_for_regex() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
let content = "fn my_func() {}\n\nfn other_func() {}\n";
create_and_commit_file(&repo_path, "test.rs", content, "Initial")?;
let content_v2 = "fn my_func() {}\n\nfn other_func() {\nprintln!(\"modified\");\n}\n";
create_and_commit_file(&repo_path, "test.rs", content_v2, "fix other")?;
let repo = Git2Repository::open(&repo_path)?;
let git_repo = repo.open_repo()?;
let head = git_repo.find_commit(get_head_oid(&repo_path)?)?;
let regex = regex::Regex::new("my_func")?;
assert!(
!commit_diff_matches_regex(&git_repo, &head, Path::new("test.rs"), ®ex)?,
"context-only changes must not match -G"
);
Ok(())
}
#[test]
fn test_pickaxe_counts_occurrence_changes_not_substring_presence() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn my_func() {}\n", "Initial")?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn my_func() { println!(\"v2\"); }\n",
"fix",
)?;
let repo = Git2Repository::open(&repo_path)?;
let git_repo = repo.open_repo()?;
let head = git_repo.find_commit(get_head_oid(&repo_path)?)?;
assert!(
!commit_pickaxe_changes_pattern(&git_repo, &head, Path::new("test.rs"), "fn my_func")?,
"body-only edits keep the same pickaxe count"
);
Ok(())
}
#[test]
fn test_find_introduction_returns_none_when_not_found() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial")?;
let repo = Git2Repository::open(&repo_path)?;
let result = repo.find_introduction(Path::new("test.rs"), "nonexistent_pattern")?;
assert!(result.is_none());
Ok(())
}
}