use std::path::Path;
use git2::{Commit, Delta, DiffFormat, DiffOptions, Repository};
use tracing::debug;
use crate::collect::errors::{CollectError, Result};
use crate::core::models::ChangeType;
pub const DIFF_BYTE_CAP: usize = 200 * 1024;
const TRUNCATION_MARKER: &str = "\n[... diff truncated: output exceeded maximum byte limit ...]\n";
pub fn diff_for_commit(repo_path: &Path, sha: &str) -> Result<String> {
let repo = Repository::open(repo_path).map_err(CollectError::Git)?;
let oid = repo
.revparse_single(sha)
.map_err(CollectError::Git)?
.peel_to_commit()
.map_err(CollectError::Git)?
.id();
let commit = repo.find_commit(oid).map_err(CollectError::Git)?;
let tree = commit.tree().map_err(CollectError::Git)?;
let parent_tree = if commit.parent_count() > 0 {
let parent = commit.parent(0).map_err(CollectError::Git)?;
Some(parent.tree().map_err(CollectError::Git)?)
} else {
debug!(
sha,
"diff_for_commit: root commit, diffing against empty tree"
);
None
};
let mut opts = DiffOptions::new();
opts.context_lines(3)
.include_typechange(true)
.ignore_whitespace(false);
let diff = repo
.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut opts))
.map_err(CollectError::Git)?;
let mut buf = String::new();
let mut capped = false;
diff.print(DiffFormat::Patch, |_delta, _hunk, line| {
if capped {
return true;
}
let origin = line.origin();
if matches!(origin, '+' | '-' | ' ' | '@' | '\\') {
buf.push(origin);
}
if let Ok(content) = std::str::from_utf8(line.content()) {
let remaining = DIFF_BYTE_CAP.saturating_sub(buf.len());
if content.len() > remaining {
buf.push_str(&content[..remaining]);
capped = true;
} else {
buf.push_str(content);
}
}
true
})
.map_err(CollectError::Git)?;
if capped {
buf.push_str(TRUNCATION_MARKER);
}
Ok(buf)
}
#[derive(Debug, Clone, Default)]
pub struct CommitDiff {
pub files_changed: u32,
pub insertions: u32,
pub deletions: u32,
pub files: Vec<FileDiff>,
}
#[derive(Debug, Clone)]
pub struct FileDiff {
pub path: String,
pub change_type: ChangeType,
pub insertions: u32,
pub deletions: u32,
}
pub fn compute_commit_diff(repo: &Repository, commit: &Commit<'_>) -> Result<CommitDiff> {
let tree = commit.tree()?;
let parent_tree = if commit.parent_count() > 0 {
Some(commit.parent(0)?.tree()?)
} else {
None
};
let mut opts = DiffOptions::new();
opts.include_typechange(true);
let mut diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut opts))?;
let mut find_opts = git2::DiffFindOptions::new();
find_opts.renames(true).copies(true);
diff.find_similar(Some(&mut find_opts))?;
let stats = diff.stats()?;
let files_cell: std::cell::RefCell<Vec<FileDiff>> =
std::cell::RefCell::new(Vec::with_capacity(stats.files_changed()));
diff.foreach(
&mut |delta, _progress| {
let path = delta
.new_file()
.path()
.or_else(|| delta.old_file().path())
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
let change_type = map_change_type(delta.status());
files_cell.borrow_mut().push(FileDiff {
path,
change_type,
insertions: 0,
deletions: 0,
});
true
},
None,
None,
Some(&mut |delta, _hunk, line| {
let path = delta
.new_file()
.path()
.or_else(|| delta.old_file().path())
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
let mut files = files_cell.borrow_mut();
if let Some(file) = files.iter_mut().find(|f| f.path == path) {
match line.origin() {
'+' => file.insertions = file.insertions.saturating_add(1),
'-' => file.deletions = file.deletions.saturating_add(1),
_ => {}
}
}
true
}),
)?;
Ok(CommitDiff {
files_changed: stats.files_changed() as u32,
insertions: stats.insertions() as u32,
deletions: stats.deletions() as u32,
files: files_cell.into_inner(),
})
}
fn map_change_type(delta: Delta) -> ChangeType {
match delta {
Delta::Added | Delta::Copied | Delta::Untracked => ChangeType::Added,
Delta::Deleted => ChangeType::Deleted,
Delta::Renamed => ChangeType::Renamed,
_ => ChangeType::Modified,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_repo_with_initial_commit(filename: &str, content: &str) -> (tempfile::TempDir, String) {
let dir = tempfile::TempDir::new().expect("tempdir");
let repo = git2::Repository::init(dir.path()).expect("init repo");
let mut config = repo.config().expect("config");
config.set_str("user.name", "Test User").expect("set name");
config
.set_str("user.email", "test@example.com")
.expect("set email");
let file_path = dir.path().join(filename);
std::fs::write(&file_path, content).expect("write file");
let mut index = repo.index().expect("index");
index
.add_path(std::path::Path::new(filename))
.expect("add path");
index.write().expect("write index");
let tree_id = index.write_tree().expect("write tree");
let tree = repo.find_tree(tree_id).expect("find tree");
let sig = git2::Signature::now("Test User", "test@example.com").expect("sig");
let commit_oid = repo
.commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])
.expect("initial commit");
(dir, commit_oid.to_string())
}
fn add_follow_up_commit(repo_path: &Path, filename: &str, new_content: &str) -> String {
let repo = git2::Repository::open(repo_path).expect("open repo");
let file_path = repo_path.join(filename);
std::fs::write(&file_path, new_content).expect("write file");
let mut index = repo.index().expect("index");
index
.add_path(std::path::Path::new(filename))
.expect("add path");
index.write().expect("write index");
let tree_id = index.write_tree().expect("write tree");
let tree = repo.find_tree(tree_id).expect("find tree");
let sig = git2::Signature::now("Test User", "test@example.com").expect("sig");
let head = repo.head().expect("head").peel_to_commit().expect("peel");
let commit_oid = repo
.commit(
Some("HEAD"),
&sig,
&sig,
"Follow-up commit",
&tree,
&[&head],
)
.expect("follow-up commit");
commit_oid.to_string()
}
#[test]
fn diff_for_commit_normal_commit() {
let (dir, _initial_sha) = make_repo_with_initial_commit("hello.txt", "hello world\n");
let sha = add_follow_up_commit(dir.path(), "hello.txt", "hello universe\n");
let diff = diff_for_commit(dir.path(), &sha).expect("diff_for_commit");
assert!(
diff.contains("+hello universe"),
"diff should contain added line: {diff}"
);
assert!(
diff.contains("-hello world"),
"diff should contain removed line: {diff}"
);
}
#[test]
fn diff_for_commit_initial_commit() {
let (dir, sha) = make_repo_with_initial_commit("readme.txt", "# Hello\n");
let diff = diff_for_commit(dir.path(), &sha).expect("diff_for_commit");
assert!(
diff.contains("+# Hello"),
"initial commit diff should show added content: {diff}"
);
let minus_content_lines: Vec<&str> = diff
.lines()
.filter(|l| l.starts_with('-') && !l.starts_with("---"))
.collect();
assert!(
minus_content_lines.is_empty(),
"initial commit should have no removed lines: {:?}",
minus_content_lines
);
}
#[test]
fn diff_for_commit_truncates_at_cap() {
let line = "x".repeat(120);
let big_content: String = (0..2000)
.map(|_| line.as_str())
.collect::<Vec<_>>()
.join("\n");
let (dir, _initial_sha) = make_repo_with_initial_commit("big.txt", "");
let sha = add_follow_up_commit(dir.path(), "big.txt", &big_content);
let diff = diff_for_commit(dir.path(), &sha).expect("diff_for_commit");
assert!(
diff.len() <= DIFF_BYTE_CAP + TRUNCATION_MARKER.len() + 200,
"diff length {} should be near the byte cap",
diff.len()
);
assert!(
diff.contains("diff truncated"),
"truncated diff must contain the marker: len={}",
diff.len()
);
}
#[test]
fn diff_for_commit_invalid_sha_returns_error() {
let (dir, _) = make_repo_with_initial_commit("f.txt", "content\n");
let result = diff_for_commit(dir.path(), "0000000000000000000000000000000000000000");
assert!(result.is_err(), "invalid SHA must return an error, not Ok");
}
}