travelagent-core 1.10.3

Core library for travelagent code review tool
Documentation
use git2::{Delta, Diff, DiffFindOptions, DiffOptions, Repository};
use std::path::PathBuf;

use crate::error::{Result, TrvError};
use crate::model::{DiffFile, DiffHunk, DiffLine, FileStatus, LineOrigin};

/// Run git2's content-similarity rename detection on `diff` in-place so
/// delete+add pairs from a renamed file collapse into a single
/// `Delta::Renamed`. Without this, a rename that also touches the
/// contents (which is the common case for agent-driven refactors)
/// surfaces as two unrelated diff entries and every surviving comment
/// on the old path gets orphaned during the re-anchor pass.
///
/// Uses git2's defaults (~50% similarity threshold, rename+copy
/// detection on). Similarity scoring is O(files × files) on content
/// hashes, so on enormous trees this adds real cost — but the diffs
/// we render are already bounded by user-visible review scope.
fn apply_rename_detection(diff: &mut Diff) -> Result<()> {
    let mut opts = DiffFindOptions::new();
    opts.renames(true).copies(true);
    diff.find_similar(Some(&mut opts))?;
    Ok(())
}

pub fn get_working_tree_diff(repo: &Repository) -> Result<Vec<DiffFile>> {
    let head = repo.head()?.peel_to_tree()?;

    let mut opts = DiffOptions::new();
    opts.include_untracked(true);
    opts.show_untracked_content(true);
    opts.recurse_untracked_dirs(true);

    let mut diff = repo.diff_tree_to_workdir_with_index(Some(&head), Some(&mut opts))?;
    apply_rename_detection(&mut diff)?;

    parse_diff(&diff)
}

/// Get the staged diff (index vs HEAD)
/// On repos with no commits (unborn HEAD), diffs against an empty tree.
pub fn get_staged_diff(repo: &Repository) -> Result<Vec<DiffFile>> {
    let head = repo.head().ok().and_then(|h| h.peel_to_tree().ok());
    let index = repo.index()?;
    let mut diff = repo.diff_tree_to_index(head.as_ref(), Some(&index), None)?;
    apply_rename_detection(&mut diff)?;
    parse_diff(&diff)
}

/// Get the unstaged diff (working tree vs index)
pub fn get_unstaged_diff(repo: &Repository) -> Result<Vec<DiffFile>> {
    let index = repo.index()?;
    let mut opts = DiffOptions::new();
    opts.include_untracked(true);
    opts.show_untracked_content(true);
    opts.recurse_untracked_dirs(true);

    let mut diff = repo.diff_index_to_workdir(Some(&index), Some(&mut opts))?;
    apply_rename_detection(&mut diff)?;
    parse_diff(&diff)
}

/// Get the diff for a range of commits.
/// `commit_ids` should be ordered from oldest to newest.
/// The diff compares the oldest commit's parent to the newest commit.
pub fn get_commit_range_diff(repo: &Repository, commit_ids: &[String]) -> Result<Vec<DiffFile>> {
    if commit_ids.is_empty() {
        return Err(TrvError::NoChanges);
    }

    // Find the oldest commit (last in our list since commits are oldest to newest)
    let oldest_id = git2::Oid::from_str(&commit_ids[0])?;
    let oldest_commit = repo.find_commit(oldest_id)?;

    // Find the newest commit (last in our list since commits are oldest to newest)
    let newest_id = git2::Oid::from_str(commit_ids.last().ok_or(TrvError::NoChanges)?)?;
    let newest_commit = repo.find_commit(newest_id)?;

    // Get the parent of the oldest commit, or use an empty tree if it's the initial commit
    let old_tree = if oldest_commit.parent_count() > 0 {
        Some(oldest_commit.parent(0)?.tree()?)
    } else {
        None
    };

    let new_tree = newest_commit.tree()?;

    let mut diff = repo.diff_tree_to_tree(old_tree.as_ref(), Some(&new_tree), None)?;
    apply_rename_detection(&mut diff)?;

    parse_diff(&diff)
}

/// Get a combined diff from the parent of the oldest commit through to the working tree.
/// This shows both committed and working tree changes in a single diff.
pub fn get_working_tree_with_commits_diff(
    repo: &Repository,
    commit_ids: &[String],
) -> Result<Vec<DiffFile>> {
    if commit_ids.is_empty() {
        return Err(TrvError::NoChanges);
    }

    // Find the oldest commit (first in our list since commits are oldest to newest)
    let oldest_id = git2::Oid::from_str(&commit_ids[0])?;
    let oldest_commit = repo.find_commit(oldest_id)?;

    // Get the parent of the oldest commit, or use an empty tree if it's the initial commit
    let old_tree = if oldest_commit.parent_count() > 0 {
        Some(oldest_commit.parent(0)?.tree()?)
    } else {
        None
    };

    let mut opts = DiffOptions::new();
    opts.include_untracked(true);
    opts.show_untracked_content(true);
    opts.recurse_untracked_dirs(true);

    let mut diff = repo.diff_tree_to_workdir_with_index(old_tree.as_ref(), Some(&mut opts))?;
    apply_rename_detection(&mut diff)?;

    parse_diff(&diff)
}

fn parse_diff(diff: &Diff) -> Result<Vec<DiffFile>> {
    let mut files: Vec<DiffFile> = Vec::new();

    // Untracked files larger than this are shown in the file list but their
    // content is not parsed — they are likely logs, dumps, or build artefacts.
    const MAX_UNTRACKED_FILE_SIZE: u64 = 10 * 1_024 * 1_024;

    for (delta_idx, delta) in diff.deltas().enumerate() {
        let status = match delta.status() {
            Delta::Added | Delta::Untracked => FileStatus::Added,
            Delta::Deleted => FileStatus::Deleted,
            Delta::Modified => FileStatus::Modified,
            Delta::Renamed => FileStatus::Renamed,
            Delta::Copied => FileStatus::Copied,
            _ => FileStatus::Modified,
        };

        let old_path = delta.old_file().path().map(PathBuf::from);
        let new_path = delta.new_file().path().map(PathBuf::from);
        let is_binary = delta.old_file().is_binary() || delta.new_file().is_binary();
        let is_too_large =
            delta.status() == Delta::Untracked && delta.new_file().size() > MAX_UNTRACKED_FILE_SIZE;

        let hunks = if is_binary || is_too_large {
            Vec::new()
        } else {
            parse_hunks(diff, delta_idx)?
        };

        files.push(DiffFile {
            old_path,
            new_path,
            status,
            hunks,
            is_binary,
            is_too_large,
            is_commit_message: false,
        });
    }

    if files.is_empty() {
        return Err(TrvError::NoChanges);
    }

    Ok(files)
}

fn parse_hunks(diff: &Diff, delta_idx: usize) -> Result<Vec<DiffHunk>> {
    let mut hunks: Vec<DiffHunk> = Vec::new();

    let patch = git2::Patch::from_diff(diff, delta_idx)?;

    if let Some(patch) = patch {
        for hunk_idx in 0..patch.num_hunks() {
            let (hunk, _) = patch.hunk(hunk_idx)?;

            let header = String::from_utf8_lossy(hunk.header()).trim().to_string();
            let old_start = hunk.old_start();
            let old_count = hunk.old_lines();
            let new_start = hunk.new_start();
            let new_count = hunk.new_lines();

            let mut lines: Vec<DiffLine> = Vec::new();

            for line_idx in 0..patch.num_lines_in_hunk(hunk_idx)? {
                let line = patch.line_in_hunk(hunk_idx, line_idx)?;

                let origin = match line.origin() {
                    '+' => LineOrigin::Addition,
                    '-' => LineOrigin::Deletion,
                    ' ' => LineOrigin::Context,
                    _ => LineOrigin::Context,
                };

                let content = String::from_utf8_lossy(line.content())
                    .trim_end_matches('\n')
                    .trim_end_matches('\r')
                    .replace('\t', "    ")
                    .clone();

                lines.push(DiffLine {
                    origin,
                    content,
                    old_lineno: line.old_lineno(),
                    new_lineno: line.new_lineno(),
                    highlighted_spans: None,
                });
            }

            hunks.push(DiffHunk {
                header,
                lines,
                old_start,
                old_count,
                new_start,
                new_count,
            });
        }
    }

    Ok(hunks)
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;
    use std::path::Path;

    fn create_initial_commit(repo: &Repository, file_name: &str, content: &str) {
        fs::write(repo.workdir().unwrap().join(file_name), content)
            .expect("failed to write initial file");

        let mut index = repo.index().expect("failed to open index");
        index
            .add_path(Path::new(file_name))
            .expect("failed to add file to index");
        index.write().expect("failed to write index");

        let tree_id = index.write_tree().expect("failed to write tree");
        let tree = repo.find_tree(tree_id).expect("failed to find tree");
        let sig = git2::Signature::now("Test User", "test@example.com")
            .expect("failed to create signature");

        repo.commit(Some("HEAD"), &sig, &sig, "initial", &tree, &[])
            .expect("failed to create commit");
    }

    #[test]
    fn should_return_no_changes_for_clean_repo() {
        // given
        let repo = Repository::discover(".").unwrap();
        let head = repo.head().unwrap().peel_to_tree().unwrap();
        let diff = repo
            .diff_tree_to_tree(Some(&head), Some(&head), None)
            .unwrap();

        // when
        let result = parse_diff(&diff);

        // then
        assert!(matches!(result, Err(TrvError::NoChanges)));
    }

    #[test]
    fn should_expand_tabs_to_spaces_in_git_hunks() {
        let temp_dir = tempfile::tempdir().expect("failed to create temp dir");
        let repo = Repository::init(temp_dir.path()).expect("failed to init repo");

        create_initial_commit(
            &repo, "file.txt", r"old
",
        );

        fs::write(
            temp_dir.path().join("file.txt"),
            r"	new
",
        )
        .expect("failed to update file");

        let files = get_working_tree_diff(&repo).expect("failed to get diff");

        assert_eq!(files.len(), 1);
        let lines = &files[0].hunks[0].lines;

        assert!(
            lines.iter().any(|l| l.content == "    new"),
            "expected tab-expanded content in git diff lines"
        );
        assert!(lines.iter().all(|l| !l.content.contains('\t')));
    }

    #[test]
    fn should_detect_rename_as_single_diff_entry() {
        let temp_dir = tempfile::tempdir().expect("failed to create temp dir");
        let repo = Repository::init(temp_dir.path()).expect("failed to init repo");

        // Seed with enough content that a rename + small edit still scores
        // above git2's default similarity threshold.
        let body = (0..40).map(|i| format!("line {i}\n")).collect::<String>();
        create_initial_commit(&repo, "old.txt", &body);

        fs::remove_file(temp_dir.path().join("old.txt")).expect("failed to remove old file");
        let mut tweaked = body.clone();
        tweaked.push_str("line 40 (added)\n");
        fs::write(temp_dir.path().join("new.txt"), &tweaked).expect("failed to write new file");

        let mut index = repo.index().expect("failed to open index");
        index
            .remove_path(Path::new("old.txt"))
            .expect("failed to remove old path from index");
        index
            .add_path(Path::new("new.txt"))
            .expect("failed to add new path to index");
        index.write().expect("failed to write index");

        let files = get_working_tree_diff(&repo).expect("failed to get diff");

        assert_eq!(
            files.len(),
            1,
            "rename should collapse into one diff entry, got {files:?}",
        );
        let file = &files[0];
        assert_eq!(file.status, FileStatus::Renamed);
        assert_eq!(file.old_path.as_deref(), Some(Path::new("old.txt")));
        assert_eq!(file.new_path.as_deref(), Some(Path::new("new.txt")));
    }

    #[test]
    fn should_separate_staged_and_unstaged_diffs() {
        let temp_dir = tempfile::tempdir().expect("failed to create temp dir");
        let repo = Repository::init(temp_dir.path()).expect("failed to init repo");

        create_initial_commit(&repo, "file.txt", "base\n");

        fs::write(temp_dir.path().join("file.txt"), "unstaged\n").expect("failed to update file");

        let unstaged = get_unstaged_diff(&repo).expect("unstaged diff failed");
        assert_eq!(unstaged.len(), 1);
        assert!(matches!(get_staged_diff(&repo), Err(TrvError::NoChanges)));

        let mut index = repo.index().expect("failed to open index");
        index
            .add_path(Path::new("file.txt"))
            .expect("failed to add file to index");
        index.write().expect("failed to write index");

        let staged = get_staged_diff(&repo).expect("staged diff failed");
        assert_eq!(staged.len(), 1);
        assert!(matches!(get_unstaged_diff(&repo), Err(TrvError::NoChanges)));
    }
}