use schemars::JsonSchema;
use serde::Serialize;
use crate::git::reader::{GitError, RepoReader};
#[derive(Debug, Clone, Copy, Serialize, JsonSchema, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ChangeType {
Added,
Modified,
Deleted,
Renamed,
Copied,
}
#[derive(Debug, Clone, Copy, Serialize, JsonSchema, PartialEq, Eq, Hash)]
#[serde(rename_all = "snake_case")]
pub enum ChangeScope {
Committed,
Staged,
Unstaged,
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct FileChange {
pub path: String,
pub old_path: Option<String>,
pub change_type: ChangeType,
pub change_scope: ChangeScope,
pub is_binary: bool,
pub lines_added: usize,
pub lines_removed: usize,
pub size_before: usize,
pub size_after: usize,
pub staged_blob_id: Option<String>,
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct DiffResult {
pub files: Vec<FileChange>,
}
impl RepoReader {
pub fn diff_commits(&self, base_ref: &str, head_ref: &str) -> Result<DiffResult, GitError> {
let _span = tracing::info_span!("git.diff_commits").entered();
let base_commit = self.peel_to_commit(base_ref)?;
let head_commit = self.peel_to_commit(head_ref)?;
let base_tree = base_commit.tree().map_err(obj_err)?;
let head_tree = head_commit.tree().map_err(obj_err)?;
let mut files: Vec<FileChange> = Vec::new();
base_tree
.changes()
.map_err(obj_err)?
.for_each_to_obtain_tree(&head_tree, |change| {
use gix::object::tree::diff::Change as C;
let file_change = match change {
C::Addition {
location,
id,
entry_mode,
relation: _,
} => {
if entry_mode.is_commit() {
return Ok(gix::object::tree::diff::Action::Continue(()));
}
let (size_after, is_binary, lines_added) = blob_stats(&id)?;
FileChange {
path: location.to_string(),
old_path: None,
change_type: ChangeType::Added,
change_scope: ChangeScope::Committed,
is_binary,
lines_added,
lines_removed: 0,
size_before: 0,
size_after,
staged_blob_id: None,
}
}
C::Deletion {
location,
id,
entry_mode,
relation: _,
} => {
if entry_mode.is_commit() {
return Ok(gix::object::tree::diff::Action::Continue(()));
}
let (size_before, is_binary, lines_removed) = blob_stats(&id)?;
FileChange {
path: location.to_string(),
old_path: None,
change_type: ChangeType::Deleted,
change_scope: ChangeScope::Committed,
is_binary,
lines_added: 0,
lines_removed,
size_before,
size_after: 0,
staged_blob_id: None,
}
}
C::Modification {
location,
previous_id,
id,
previous_entry_mode,
entry_mode,
} => {
if entry_mode.is_commit() && previous_entry_mode.is_commit() {
return Ok(gix::object::tree::diff::Action::Continue(()));
}
if entry_mode.is_commit() {
let (size_before, is_binary, lines_removed) = blob_stats(&previous_id)?;
FileChange {
path: location.to_string(),
old_path: None,
change_type: ChangeType::Deleted,
change_scope: ChangeScope::Committed,
is_binary,
lines_added: 0,
lines_removed,
size_before,
size_after: 0,
staged_blob_id: None,
}
}
else if previous_entry_mode.is_commit() {
let (size_after, is_binary, lines_added) = blob_stats(&id)?;
FileChange {
path: location.to_string(),
old_path: None,
change_type: ChangeType::Added,
change_scope: ChangeScope::Committed,
is_binary,
lines_added,
lines_removed: 0,
size_before: 0,
size_after,
staged_blob_id: None,
}
}
else {
let old_obj = previous_id
.object()
.map_err(|e| GitError::ReadObject(e.to_string()))?;
let new_obj = id
.object()
.map_err(|e| GitError::ReadObject(e.to_string()))?;
let size_before = old_obj.data.len();
let size_after = new_obj.data.len();
let is_binary = old_obj.data.contains(&0) || new_obj.data.contains(&0);
let (lines_added, lines_removed) = if is_binary {
(0, 0)
} else {
count_line_changes(
Some(old_obj.data.as_ref()),
Some(new_obj.data.as_ref()),
)
};
FileChange {
path: location.to_string(),
old_path: None,
change_type: ChangeType::Modified,
change_scope: ChangeScope::Committed,
is_binary,
lines_added,
lines_removed,
size_before,
size_after,
staged_blob_id: None,
}
}
}
C::Rewrite {
source_location,
source_id,
location,
id,
diff,
copy,
source_entry_mode,
source_relation: _,
entry_mode,
relation: _,
} => {
if entry_mode.is_commit() || source_entry_mode.is_commit() {
return Ok(gix::object::tree::diff::Action::Continue(()));
}
let old_obj = source_id
.object()
.map_err(|e| GitError::ReadObject(e.to_string()))?;
let new_obj = id
.object()
.map_err(|e| GitError::ReadObject(e.to_string()))?;
let size_before = old_obj.data.len();
let size_after = new_obj.data.len();
let is_binary = old_obj.data.contains(&0) || new_obj.data.contains(&0);
let (lines_added, lines_removed) = match diff {
Some(stats) => (stats.insertions as usize, stats.removals as usize),
None => (0, 0),
};
FileChange {
path: location.to_string(),
old_path: Some(source_location.to_string()),
change_type: if copy {
ChangeType::Copied
} else {
ChangeType::Renamed
},
change_scope: ChangeScope::Committed,
is_binary,
lines_added,
lines_removed,
size_before,
size_after,
staged_blob_id: None,
}
}
};
files.push(file_change);
Ok::<gix::object::tree::diff::Action, GitError>(
gix::object::tree::diff::Action::Continue(()),
)
})
.map_err(obj_err)?;
Ok(DiffResult { files })
}
}
fn obj_err(e: impl std::fmt::Display) -> GitError {
GitError::ReadObject(e.to_string())
}
fn blob_stats(id: &gix::Id<'_>) -> Result<(usize, bool, usize), GitError> {
let obj = id
.object()
.map_err(|e| GitError::ReadObject(e.to_string()))?;
let is_binary = obj.data.contains(&0);
let lines = if is_binary { 0 } else { count_lines(&obj.data) };
Ok((obj.data.len(), is_binary, lines))
}
pub(crate) fn count_line_changes(
old_data: Option<&[u8]>,
new_data: Option<&[u8]>,
) -> (usize, usize) {
use std::borrow::Cow;
let old_text: Cow<'_, str> = old_data.map_or(Cow::Borrowed(""), |d| String::from_utf8_lossy(d));
let new_text: Cow<'_, str> = new_data.map_or(Cow::Borrowed(""), |d| String::from_utf8_lossy(d));
let input = gix::diff::blob::InternedInput::new(old_text.as_ref(), new_text.as_ref());
let diff = gix::diff::blob::Diff::compute(gix::diff::blob::Algorithm::Myers, &input);
(
diff.count_additions() as usize,
diff.count_removals() as usize,
)
}
pub(crate) fn count_lines(data: &[u8]) -> usize {
if data.is_empty() {
return 0;
}
let newline_count = data.iter().filter(|&&b| b == b'\n').count();
newline_count + if data[data.len() - 1] != b'\n' { 1 } else { 0 }
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct HunkBoundary {
pub old_start: usize,
pub old_lines: usize,
pub new_start: usize,
pub new_lines: usize,
}
pub(crate) fn extract_hunks(old_data: &[u8], new_data: &[u8]) -> Vec<HunkBoundary> {
use std::borrow::Cow;
if old_data.contains(&0) || new_data.contains(&0) {
return Vec::new();
}
let old_text: Cow<'_, str> = String::from_utf8_lossy(old_data);
let new_text: Cow<'_, str> = String::from_utf8_lossy(new_data);
let input = gix::diff::blob::InternedInput::new(old_text.as_ref(), new_text.as_ref());
let mut diff = gix::diff::blob::Diff::compute(gix::diff::blob::Algorithm::Myers, &input);
diff.postprocess_lines(&input);
let mut hunks = Vec::new();
for hunk in diff.hunks() {
hunks.push(HunkBoundary {
old_start: hunk.before.start as usize + 1,
old_lines: (hunk.before.end - hunk.before.start) as usize,
new_start: hunk.after.start as usize + 1,
new_lines: (hunk.after.end - hunk.after.start) as usize,
});
}
hunks
}
#[cfg(test)]
mod tests {
use super::*;
use std::process::Command;
use tempfile::TempDir;
fn create_repo_with_two_commits() -> (TempDir, std::path::PathBuf) {
let dir = TempDir::new().unwrap();
let path = dir.path().to_path_buf();
Command::new("git")
.args(["init", "--initial-branch=main"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(&path)
.output()
.unwrap();
std::fs::write(path.join("existing.txt"), "hello\n").unwrap();
Command::new("git")
.args(["add", "existing.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "initial commit"])
.current_dir(&path)
.output()
.unwrap();
std::fs::write(path.join("added.txt"), "new file\n").unwrap();
Command::new("git")
.args(["add", "added.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add a file"])
.current_dir(&path)
.output()
.unwrap();
(dir, path)
}
#[test]
fn it_detects_added_file() {
let (_dir, path) = create_repo_with_two_commits();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
assert_eq!(diff.files.len(), 1);
assert_eq!(diff.files[0].path, "added.txt");
assert_eq!(diff.files[0].change_type, ChangeType::Added);
}
#[test]
fn it_reports_size_and_lines_for_added_file() {
let (_dir, path) = create_repo_with_two_commits();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
assert_eq!(diff.files[0].size_before, 0);
assert_eq!(diff.files[0].size_after, 9); assert_eq!(diff.files[0].lines_added, 1);
assert_eq!(diff.files[0].lines_removed, 0);
}
#[test]
fn it_detects_deleted_file() {
let (_dir, path) = create_repo_with_two_commits();
std::fs::remove_file(path.join("existing.txt")).unwrap();
Command::new("git")
.args(["add", "existing.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "delete a file"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
assert_eq!(diff.files.len(), 1);
assert_eq!(diff.files[0].path, "existing.txt");
assert_eq!(diff.files[0].change_type, ChangeType::Deleted);
assert!(diff.files[0].size_before > 0);
assert_eq!(diff.files[0].size_after, 0);
}
#[test]
fn it_detects_modified_file() {
let (_dir, path) = create_repo_with_two_commits();
std::fs::write(path.join("existing.txt"), "hello\nworld\n").unwrap();
Command::new("git")
.args(["add", "existing.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "modify a file"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let modified = diff
.files
.iter()
.find(|f| f.path == "existing.txt")
.unwrap();
assert_eq!(modified.change_type, ChangeType::Modified);
assert!(modified.lines_added > 0);
}
#[test]
fn it_detects_renamed_file() {
let (_dir, path) = create_repo_with_two_commits();
Command::new("git")
.args(["mv", "existing.txt", "renamed.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "rename a file"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let renamed = diff.files.iter().find(|f| f.path == "renamed.txt").unwrap();
assert_eq!(renamed.change_type, ChangeType::Renamed);
assert_eq!(renamed.old_path.as_deref(), Some("existing.txt"));
}
#[test]
fn it_counts_lines_for_file_without_trailing_newline() {
let (_dir, path) = create_repo_with_two_commits();
std::fs::write(path.join("no_newline.txt"), "hello").unwrap();
Command::new("git")
.args(["add", "no_newline.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add file without trailing newline"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let file = diff
.files
.iter()
.find(|f| f.path == "no_newline.txt")
.unwrap();
assert_eq!(
file.lines_added, 1,
"non-empty file without trailing newline should count as 1 line"
);
assert_eq!(file.size_after, 5);
}
#[test]
fn it_counts_lines_for_deleted_file_without_trailing_newline() {
let (_dir, path) = create_repo_with_two_commits();
std::fs::write(path.join("ephemeral.txt"), "one\ntwo\nthree").unwrap();
Command::new("git")
.args(["add", "ephemeral.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add ephemeral file"])
.current_dir(&path)
.output()
.unwrap();
std::fs::remove_file(path.join("ephemeral.txt")).unwrap();
Command::new("git")
.args(["add", "ephemeral.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "delete ephemeral file"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let file = diff
.files
.iter()
.find(|f| f.path == "ephemeral.txt")
.unwrap();
assert_eq!(
file.lines_removed, 3,
"three lines without trailing newline: 'one\\ntwo\\nthree'"
);
assert_eq!(file.change_type, ChangeType::Deleted);
}
#[test]
fn it_counts_lines_for_multiline_file_with_trailing_newline() {
let (_dir, path) = create_repo_with_two_commits();
std::fs::write(path.join("twolines.txt"), "one\ntwo\n").unwrap();
Command::new("git")
.args(["add", "twolines.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add two-line file"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let file = diff
.files
.iter()
.find(|f| f.path == "twolines.txt")
.unwrap();
assert_eq!(file.lines_added, 2, "'one\\ntwo\\n' is 2 lines, not 3");
}
#[test]
fn it_returns_nonzero_stats_for_a_valid_commit_diff() {
let (_dir, path) = create_repo_with_two_commits();
let reader = RepoReader::open(&path).unwrap();
let result = reader.diff_commits("HEAD~1", "HEAD");
assert!(
result.is_ok(),
"diff_commits with valid refs should succeed"
);
let diff = result.unwrap();
let file = &diff.files[0];
assert_eq!(file.size_after, 9);
assert_eq!(file.lines_added, 1);
}
#[test]
fn it_flags_binary_files() {
let (_dir, path) = create_repo_with_two_commits();
std::fs::write(path.join("image.png"), [0x89, 0x50, 0x4E, 0x47, 0x00, 0x00]).unwrap();
Command::new("git")
.args(["add", "image.png"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add binary file"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let binary = diff.files.iter().find(|f| f.path == "image.png").unwrap();
assert!(binary.is_binary);
assert_eq!(binary.lines_added, 0);
}
#[test]
fn it_sets_committed_scope_on_diff_commits() {
let (_dir, path) = create_repo_with_two_commits();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
for file in &diff.files {
assert_eq!(file.change_scope, ChangeScope::Committed);
}
}
#[test]
fn count_lines_returns_zero_for_empty_data() {
assert_eq!(count_lines(b""), 0);
}
#[test]
fn count_lines_counts_single_line_without_trailing_newline() {
assert_eq!(count_lines(b"hello"), 1);
}
#[test]
fn count_lines_counts_single_line_with_trailing_newline() {
assert_eq!(count_lines(b"hello\n"), 1);
}
#[test]
fn count_lines_counts_multiple_lines_with_trailing_newline() {
assert_eq!(count_lines(b"one\ntwo\n"), 2);
}
#[test]
fn count_lines_counts_multiple_lines_without_trailing_newline() {
assert_eq!(count_lines(b"one\ntwo\nthree"), 3);
}
#[test]
fn change_scope_serializes_as_snake_case() {
let json = serde_json::to_value(ChangeScope::Staged).unwrap();
assert_eq!(json, "staged");
let json = serde_json::to_value(ChangeScope::Unstaged).unwrap();
assert_eq!(json, "unstaged");
let json = serde_json::to_value(ChangeScope::Committed).unwrap();
assert_eq!(json, "committed");
}
#[test]
fn it_detects_binary_when_only_old_blob_is_binary() {
let (_dir, path) = create_repo_with_two_commits();
std::fs::write(path.join("data.bin"), [0x00, 0x01, 0x02]).unwrap();
Command::new("git")
.args(["add", "data.bin"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add binary file"])
.current_dir(&path)
.output()
.unwrap();
std::fs::write(path.join("data.bin"), "now text\n").unwrap();
Command::new("git")
.args(["add", "data.bin"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "replace binary with text"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let file = diff.files.iter().find(|f| f.path == "data.bin").unwrap();
assert!(
file.is_binary,
"should be binary when old blob contains null bytes"
);
assert_eq!(file.lines_added, 0);
assert_eq!(file.lines_removed, 0);
}
#[test]
fn it_detects_binary_when_only_new_blob_is_binary() {
let (_dir, path) = create_repo_with_two_commits();
std::fs::write(path.join("data.bin"), "text content\n").unwrap();
Command::new("git")
.args(["add", "data.bin"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add text file"])
.current_dir(&path)
.output()
.unwrap();
std::fs::write(path.join("data.bin"), [0x89, 0x50, 0x00, 0x47]).unwrap();
Command::new("git")
.args(["add", "data.bin"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "replace text with binary"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let file = diff.files.iter().find(|f| f.path == "data.bin").unwrap();
assert!(
file.is_binary,
"should be binary when new blob contains null bytes"
);
assert_eq!(file.lines_added, 0);
assert_eq!(file.lines_removed, 0);
}
#[test]
fn it_counts_exact_line_changes_for_modification() {
let old = b"line1\nline2\nline3\n";
let new = b"line1\nmodified\nline3\nextra\n";
let (added, removed) = count_line_changes(Some(old), Some(new));
assert_eq!(added, 2, "expected 2 lines added");
assert_eq!(removed, 1, "expected 1 line removed");
}
#[test]
fn it_counts_zero_changes_for_identical_content() {
let data = b"same\ncontent\n";
let (added, removed) = count_line_changes(Some(data), Some(data));
assert_eq!(added, 0);
assert_eq!(removed, 0);
}
#[test]
fn it_counts_only_additions_when_old_is_empty() {
let (added, removed) = count_line_changes(Some(b""), Some(b"one\ntwo\n"));
assert_eq!(added, 2);
assert_eq!(removed, 0);
}
#[test]
fn it_counts_only_removals_when_new_is_empty() {
let (added, removed) = count_line_changes(Some(b"one\ntwo\nthree\n"), Some(b""));
assert_eq!(added, 0);
assert_eq!(removed, 3);
}
#[test]
fn it_filters_submodule_addition_in_committed_diff() {
let (_dir, path) = create_repo_with_two_commits();
let submod_dir = TempDir::new().unwrap();
let submod_path = submod_dir.path().to_path_buf();
Command::new("git")
.args(["init", "--initial-branch=main"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(&submod_path)
.output()
.unwrap();
std::fs::write(submod_path.join("sub.txt"), "sub content\n").unwrap();
Command::new("git")
.args(["add", "sub.txt"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "sub initial"])
.current_dir(&submod_path)
.output()
.unwrap();
let sha = String::from_utf8(
Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(&submod_path)
.output()
.unwrap()
.stdout,
)
.unwrap()
.trim()
.to_string();
Command::new("git")
.args([
"update-index",
"--add",
"--cacheinfo",
&format!("160000,{sha},sub"),
])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add submodule"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
assert!(
!diff.files.iter().any(|f| f.path == "sub"),
"submodule addition should be filtered from committed diff"
);
}
#[test]
fn it_preserves_binary_png_in_committed_diff() {
let (_dir, path) = create_repo_with_two_commits();
std::fs::write(path.join("image.png"), [0x89, 0x50, 0x4E, 0x47, 0x00, 0x00]).unwrap();
Command::new("git")
.args(["add", "image.png"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add png"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
assert!(
diff.files.iter().any(|f| f.path == "image.png"),
"binary PNG should be preserved in committed diff"
);
}
#[test]
fn it_filters_submodule_replaced_by_file_in_committed_diff() {
let (_dir, path) = create_repo_with_two_commits();
let submod_dir = TempDir::new().unwrap();
let submod_path = submod_dir.path().to_path_buf();
Command::new("git")
.args(["init", "--initial-branch=main"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(&submod_path)
.output()
.unwrap();
std::fs::write(submod_path.join("sub.txt"), "sub content\n").unwrap();
Command::new("git")
.args(["add", "sub.txt"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "sub initial"])
.current_dir(&submod_path)
.output()
.unwrap();
let sha = String::from_utf8(
Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(&submod_path)
.output()
.unwrap()
.stdout,
)
.unwrap()
.trim()
.to_string();
Command::new("git")
.args([
"update-index",
"--add",
"--cacheinfo",
&format!("160000,{sha},sub"),
])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add submodule"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["rm", "-f", "sub"])
.current_dir(&path)
.output()
.unwrap();
std::fs::write(path.join("sub"), "now a regular file\n").unwrap();
Command::new("git")
.args(["add", "sub"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "replace submodule with file"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let sub_entries: Vec<_> = diff.files.iter().filter(|f| f.path == "sub").collect();
assert!(
sub_entries.len() <= 1,
"expected at most one 'sub' entry; got {:?}",
sub_entries
);
if let Some(f) = sub_entries.first() {
if f.change_type == ChangeType::Modified {
assert_eq!(
f.size_before, 20,
"size_before should be old file size, not submodule commit object text size"
);
}
}
}
#[test]
fn it_preserves_symlinks_in_committed_diff() {
let (_dir, path) = create_repo_with_two_commits();
std::fs::write(path.join("target.txt"), "target content\n").unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::symlink;
symlink("target.txt", path.join("link.txt")).unwrap();
}
#[cfg(not(unix))]
{
return;
}
Command::new("git")
.args(["add", "target.txt", "link.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add symlink"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
assert!(
diff.files.iter().any(|f| f.path == "link.txt"),
"symlink should be preserved in committed diff, not filtered as gitlink"
);
}
#[test]
fn it_reports_deletion_when_file_replaced_by_submodule() {
let (_dir, path) = create_repo_with_two_commits();
let submod_dir = TempDir::new().unwrap();
let submod_path = submod_dir.path().to_path_buf();
Command::new("git")
.args(["init", "--initial-branch=main"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(&submod_path)
.output()
.unwrap();
std::fs::write(submod_path.join("sub.txt"), "sub content\n").unwrap();
Command::new("git")
.args(["add", "sub.txt"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "sub initial"])
.current_dir(&submod_path)
.output()
.unwrap();
let sha = String::from_utf8(
Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(&submod_path)
.output()
.unwrap()
.stdout,
)
.unwrap()
.trim()
.to_string();
std::fs::write(path.join("was_a_file.txt"), "regular file content\n").unwrap();
Command::new("git")
.args(["add", "was_a_file.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add regular file"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["rm", "was_a_file.txt"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args([
"update-index",
"--add",
"--cacheinfo",
&format!("160000,{sha},was_a_file.txt"),
])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "replace file with submodule"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let file_entries: Vec<_> = diff
.files
.iter()
.filter(|f| f.path == "was_a_file.txt")
.collect();
assert!(
!file_entries.is_empty(),
"file-to-submodule transition must report the file deletion"
);
assert_eq!(
file_entries[0].change_type,
ChangeType::Deleted,
"file-to-submodule transition must report a Deletion"
);
assert_eq!(
file_entries[0].size_before, 21,
"size_before should be the regular file's size (21 bytes)"
);
}
#[test]
fn it_reports_addition_when_submodule_replaced_by_file() {
let (_dir, path) = create_repo_with_two_commits();
let submod_dir = TempDir::new().unwrap();
let submod_path = submod_dir.path().to_path_buf();
Command::new("git")
.args(["init", "--initial-branch=main"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(&submod_path)
.output()
.unwrap();
std::fs::write(submod_path.join("sub.txt"), "sub content\n").unwrap();
Command::new("git")
.args(["add", "sub.txt"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "sub initial"])
.current_dir(&submod_path)
.output()
.unwrap();
let sha = String::from_utf8(
Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(&submod_path)
.output()
.unwrap()
.stdout,
)
.unwrap()
.trim()
.to_string();
Command::new("git")
.args([
"update-index",
"--add",
"--cacheinfo",
&format!("160000,{sha},lib"),
])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add submodule"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["rm", "-f", "lib"])
.current_dir(&path)
.output()
.unwrap();
std::fs::write(path.join("lib"), "now a regular file\n").unwrap();
Command::new("git")
.args(["add", "lib"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "replace submodule with file"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
let lib_entries: Vec<_> = diff.files.iter().filter(|f| f.path == "lib").collect();
assert!(
!lib_entries.is_empty(),
"submodule-to-file transition must report the new file"
);
assert_eq!(
lib_entries[0].change_type,
ChangeType::Added,
"submodule-to-file transition must report an Addition"
);
assert_eq!(
lib_entries[0].size_after, 19,
"size_after should be the new file's size (19 bytes)"
);
}
#[test]
fn it_filters_submodule_deletion_in_committed_diff() {
let (_dir, path) = create_repo_with_two_commits();
let submod_dir = TempDir::new().unwrap();
let submod_path = submod_dir.path().to_path_buf();
Command::new("git")
.args(["init", "--initial-branch=main"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(&submod_path)
.output()
.unwrap();
std::fs::write(submod_path.join("sub.txt"), "sub content\n").unwrap();
Command::new("git")
.args(["add", "sub.txt"])
.current_dir(&submod_path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "sub initial"])
.current_dir(&submod_path)
.output()
.unwrap();
let sha = String::from_utf8(
Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(&submod_path)
.output()
.unwrap()
.stdout,
)
.unwrap()
.trim()
.to_string();
Command::new("git")
.args([
"update-index",
"--add",
"--cacheinfo",
&format!("160000,{sha},sub"),
])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add submodule"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["rm", "-f", "sub"])
.current_dir(&path)
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "delete submodule"])
.current_dir(&path)
.output()
.unwrap();
let reader = RepoReader::open(&path).unwrap();
let diff = reader.diff_commits("HEAD~1", "HEAD").unwrap();
assert!(
!diff.files.iter().any(|f| f.path == "sub"),
"submodule deletion should be filtered from committed diff"
);
}
#[test]
fn it_counts_multiple_removals_and_additions() {
let old = b"a\nb\nc\nd\ne\n";
let new = b"a\nx\ny\nz\ne\n";
let (added, removed) = count_line_changes(Some(old), Some(new));
assert_eq!(added, 3);
assert_eq!(removed, 3);
}
#[test]
fn it_extracts_hunk_boundaries_for_single_modification() {
let old = b"line1\nline2\nline3\n";
let new = b"line1\nmodified\nline3\n";
let hunks = extract_hunks(old, new);
assert_eq!(hunks.len(), 1);
let h = &hunks[0];
assert_eq!(h.old_start, 2);
assert_eq!(h.old_lines, 1);
assert_eq!(h.new_start, 2);
assert_eq!(h.new_lines, 1);
}
#[test]
fn it_extracts_hunk_boundaries_for_appended_line() {
let old = b"line1\nline2\n";
let new = b"line1\nline2\nline3\n";
let hunks = extract_hunks(old, new);
assert_eq!(hunks.len(), 1);
let h = &hunks[0];
assert_eq!(
h.new_lines,
h.old_lines + 1,
"appending one line should increase new_lines by exactly 1"
);
}
#[test]
fn it_extracts_hunk_boundaries_for_bulk_line_changes() {
let old = b"a\nb\nc\nd\ne\n";
let new = b"a\nx\ny\nz\ne\n";
let hunks = extract_hunks(old, new);
assert_eq!(hunks.len(), 1);
let h = &hunks[0];
assert_eq!(h.old_start, 2);
assert_eq!(h.old_lines, 3);
assert_eq!(h.new_start, 2);
assert_eq!(h.new_lines, 3);
}
#[test]
fn it_returns_empty_for_identical_content() {
let data = b"same\ncontent\n";
let hunks = extract_hunks(data, data);
assert!(hunks.is_empty());
}
#[test]
fn it_extracts_hunk_for_added_content_from_empty_old() {
let hunks = extract_hunks(b"", b"something\n");
assert_eq!(hunks.len(), 1);
let h = &hunks[0];
assert_eq!(h.old_start, 1);
assert_eq!(h.old_lines, 0);
assert_eq!(h.new_start, 1);
assert_eq!(h.new_lines, 1);
}
#[test]
fn it_extracts_hunk_for_emptied_file() {
let hunks = extract_hunks(b"something\n", b"");
assert_eq!(hunks.len(), 1);
let h = &hunks[0];
assert_eq!(h.old_start, 1);
assert_eq!(h.old_lines, 1);
assert_eq!(h.new_start, 1);
assert_eq!(h.new_lines, 0);
}
#[test]
fn it_returns_empty_for_binary_content() {
let hunks = extract_hunks(b"text\x00binary", b"text\n");
assert!(hunks.is_empty());
}
#[test]
fn it_extracts_hunk_boundaries_for_prepended_line() {
let old = b"line2\nline3\n";
let new = b"line1\nline2\nline3\n";
let hunks = extract_hunks(old, new);
assert_eq!(hunks.len(), 1);
let h = &hunks[0];
assert_eq!(
h.new_lines,
h.old_lines + 1,
"prepending one line should increase new_lines by exactly 1"
);
}
#[test]
fn it_extracts_hunk_boundaries_for_single_line_change() {
let old = b"hello\n";
let new = b"world\n";
let hunks = extract_hunks(old, new);
assert_eq!(hunks.len(), 1);
let h = &hunks[0];
assert_eq!(h.old_start, 1);
assert_eq!(h.old_lines, 1);
assert_eq!(h.new_start, 1);
assert_eq!(h.new_lines, 1);
}
#[test]
fn it_extracts_hunk_boundaries_with_unicode_content() {
let old: &[u8] = b"cafe\n";
let new: &[u8] = b"caf\xc3\xa9\n";
let hunks = extract_hunks(old, new);
assert_eq!(
hunks.len(),
1,
"unicode content change should produce one hunk"
);
let h = &hunks[0];
assert_eq!(h.old_lines, 1);
assert_eq!(h.new_lines, 1);
}
#[test]
fn it_returns_empty_hunks_for_unchanged_single_line() {
let data = b"hello\n";
let hunks = extract_hunks(data, data);
assert!(
hunks.is_empty(),
"identical single line should produce no hunks"
);
}
}