use std::collections::{HashMap, HashSet};
use std::error::Error;
use std::fs;
use std::ops::ControlFlow;
use std::path::{Path, PathBuf};
use git2::{BlameOptions, Delta, DiffOptions, ObjectType, Repository, Sort, Tree};
pub struct GitRepo {
repo: Repository,
root: PathBuf,
}
pub struct FileFrequency {
pub path: PathBuf,
pub commits: usize,
pub first_commit: i64,
pub last_commit: i64,
}
pub struct BlameHunkInfo {
pub author: String,
pub email: String,
pub start_line: usize,
pub lines: usize,
}
pub struct BlameInfo {
pub author: String,
pub email: String,
pub lines: usize,
pub last_commit_time: i64,
}
impl GitRepo {
pub fn open(path: &Path) -> Result<Self, Box<dyn Error>> {
let repo = Repository::discover(path)?;
let root = repo
.workdir()
.ok_or("bare repositories are not supported")?
.to_path_buf();
Ok(Self { repo, root })
}
fn walk_commits(
&self,
since: Option<i64>,
mut f: impl FnMut(&git2::Commit) -> Result<ControlFlow<()>, Box<dyn Error>>,
) -> Result<(), Box<dyn Error>> {
let mut revwalk = self.repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
for oid in revwalk {
let commit = self.repo.find_commit(oid?)?;
if commit.parent_count() > 1 {
continue;
}
if since.is_some_and(|ts| commit.time().seconds() < ts) {
break;
}
if f(&commit)?.is_break() {
break;
}
}
Ok(())
}
pub fn file_frequencies(
&self,
since: Option<i64>,
) -> Result<Vec<FileFrequency>, Box<dyn Error>> {
let mut map: HashMap<PathBuf, FileFrequency> = HashMap::new();
self.walk_commits(since, |commit| {
let time = commit.time().seconds();
let paths = self.changed_files(commit)?;
for path in paths {
map.entry(path.clone())
.and_modify(|f| {
f.commits += 1;
if time < f.first_commit {
f.first_commit = time;
}
if time > f.last_commit {
f.last_commit = time;
}
})
.or_insert(FileFrequency {
path,
commits: 1,
first_commit: time,
last_commit: time,
});
}
Ok(ControlFlow::Continue(()))
})?;
let mut result: Vec<FileFrequency> = map.into_values().collect();
result.sort_by(|a, b| b.commits.cmp(&a.commits));
Ok(result)
}
pub fn last_modified_per_file(
&self,
targets: &[PathBuf],
) -> Result<HashMap<PathBuf, i64>, Box<dyn Error>> {
let mut remaining: HashSet<&PathBuf> = targets.iter().collect();
let mut result: HashMap<PathBuf, i64> = HashMap::new();
self.walk_commits(None, |commit| {
if remaining.is_empty() {
return Ok(ControlFlow::Break(()));
}
let time = commit.time().seconds();
for path in self.changed_files(commit)? {
if remaining.remove(&path) {
result.insert(path, time);
}
}
Ok(ControlFlow::Continue(()))
})?;
Ok(result)
}
pub fn co_changing_commits(
&self,
since: Option<i64>,
) -> Result<Vec<Vec<PathBuf>>, Box<dyn Error>> {
let mut result = Vec::new();
self.walk_commits(since, |commit| {
let paths = self.changed_files(commit)?;
if paths.len() >= 2 {
result.push(paths);
}
Ok(ControlFlow::Continue(()))
})?;
Ok(result)
}
pub fn blame_file(&self, rel_path: &Path) -> Result<Vec<BlameInfo>, Box<dyn Error>> {
let mut opts = BlameOptions::new();
let blame = self.repo.blame_file(rel_path, Some(&mut opts))?;
let mut map: HashMap<String, BlameInfo> = HashMap::new();
for hunk in blame.iter() {
let sig = hunk.final_signature();
let email = sig.email().unwrap_or("unknown").to_string();
let author = sig.name().unwrap_or("unknown").to_string();
let commit_time = sig.when().seconds();
let lines = hunk.lines_in_hunk();
let key = format!("{author} <{email}>");
map.entry(key)
.and_modify(|info| {
info.lines += lines;
if commit_time > info.last_commit_time {
info.last_commit_time = commit_time;
}
})
.or_insert(BlameInfo {
author,
email,
lines,
last_commit_time: commit_time,
});
}
let mut result: Vec<BlameInfo> = map.into_values().collect();
result.sort_by(|a, b| b.lines.cmp(&a.lines));
Ok(result)
}
pub fn blame_hunks(&self, rel_path: &Path) -> Result<Vec<BlameHunkInfo>, Box<dyn Error>> {
let mut opts = BlameOptions::new();
let blame = self.repo.blame_file(rel_path, Some(&mut opts))?;
let result = blame
.iter()
.map(|hunk| {
let sig = hunk.final_signature();
BlameHunkInfo {
author: sig.name().unwrap_or("unknown").to_string(),
email: sig.email().unwrap_or("unknown").to_string(),
start_line: hunk.final_start_line(),
lines: hunk.lines_in_hunk(),
}
})
.collect();
Ok(result)
}
pub fn recent_authors(&self, since: Option<i64>) -> Result<HashSet<String>, Box<dyn Error>> {
let mut authors = HashSet::new();
self.walk_commits(since, |commit| {
if let Some(email) = commit.author().email() {
authors.insert(email.to_string());
}
Ok(ControlFlow::Continue(()))
})?;
Ok(authors)
}
pub fn walk_prefix(&self, walk_root: &Path) -> Result<(PathBuf, PathBuf), Box<dyn Error>> {
let git_root = self
.root
.canonicalize()
.map_err(|e| format!("cannot resolve git root: {e}"))?;
let canonical_walk = walk_root
.canonicalize()
.map_err(|e| format!("cannot resolve target path {}: {e}", walk_root.display()))?;
let prefix = canonical_walk
.strip_prefix(&git_root)
.unwrap_or(Path::new(""))
.to_path_buf();
Ok((canonical_walk, prefix))
}
pub fn to_git_path(walk_root: &Path, prefix: &Path, file_path: &Path) -> PathBuf {
let rel = file_path.strip_prefix(walk_root).unwrap_or(file_path);
if prefix.as_os_str().is_empty() {
rel.to_path_buf()
} else {
prefix.join(rel)
}
}
pub fn extract_tree_to_dir(&self, refspec: &str, dest: &Path) -> Result<(), Box<dyn Error>> {
let obj = self
.repo
.revparse_single(refspec)
.map_err(|e| format!("cannot resolve ref '{refspec}': {e}"))?;
let commit = obj
.peel_to_commit()
.map_err(|e| format!("'{refspec}' is not a commit: {e}"))?;
let tree = commit.tree()?;
self.write_tree_recursive(&tree, dest)
}
fn write_tree_recursive(&self, tree: &Tree, dest: &Path) -> Result<(), Box<dyn Error>> {
for entry in tree.iter() {
let name = entry
.name()
.ok_or_else(|| format!("non-UTF-8 entry in tree: {:?}", entry.id()))?;
let path = dest.join(name);
match entry.kind() {
Some(ObjectType::Blob) => {
let blob = self.repo.find_blob(entry.id())?;
fs::write(&path, blob.content())?;
}
Some(ObjectType::Tree) => {
let subtree = self.repo.find_tree(entry.id())?;
fs::create_dir_all(&path)?;
self.write_tree_recursive(&subtree, &path)?;
}
_ => {} }
}
Ok(())
}
pub fn files_changed_since(&self, since_ref: &str) -> Result<Vec<PathBuf>, Box<dyn Error>> {
let old_obj = self
.repo
.revparse_single(since_ref)
.map_err(|e| format!("cannot resolve ref '{since_ref}': {e}"))?;
let old_tree = old_obj
.peel_to_commit()
.map_err(|e| format!("'{since_ref}' is not a commit: {e}"))?
.tree()?;
let head_tree = self.repo.head()?.peel_to_commit()?.tree()?;
let mut opts = DiffOptions::new();
let diff =
self.repo
.diff_tree_to_tree(Some(&old_tree), Some(&head_tree), Some(&mut opts))?;
let mut paths = Vec::new();
for delta in diff.deltas() {
match delta.status() {
Delta::Added | Delta::Modified | Delta::Renamed | Delta::Copied => {
if let Some(p) = delta.new_file().path() {
paths.push(self.root.join(p));
}
}
_ => {}
}
}
Ok(paths)
}
fn changed_files(&self, commit: &git2::Commit) -> Result<Vec<PathBuf>, Box<dyn Error>> {
let tree = commit.tree()?;
let parent_tree = if commit.parent_count() > 0 {
Some(commit.parent(0)?.tree()?)
} else {
None
};
let mut opts = DiffOptions::new();
let diff =
self.repo
.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut opts))?;
let mut paths = Vec::new();
for delta in diff.deltas() {
if let Some(path) = delta.new_file().path() {
paths.push(path.to_path_buf());
}
}
Ok(paths)
}
}
#[cfg(test)]
#[path = "mod_test.rs"]
mod tests;