Skip to main content

ralph_workflow/git_helpers/repo/
diff.rs

1use std::io;
2
3use crate::git_helpers::git2_to_io_error;
4use crate::workspace::Workspace;
5
6/// Get the diff of all changes (unstaged and staged).
7///
8/// Returns a formatted diff string suitable for LLM analysis.
9/// This is similar to `git diff HEAD`.
10///
11/// Handles the case of an empty repository (no commits yet) by
12/// diffing against an empty tree using a read-only approach.
13pub fn git_diff() -> io::Result<String> {
14    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
15    git_diff_impl(&repo)
16}
17
18/// Generate a diff from a specific starting commit.
19///
20/// Takes a starting commit OID and generates a diff between that commit
21/// and the current working tree. Returns a formatted diff string suitable
22/// for LLM analysis.
23pub fn git_diff_from(start_oid: &str) -> io::Result<String> {
24    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
25
26    // Parse the starting OID.
27    let oid = git2::Oid::from_str(start_oid).map_err(|_| {
28        io::Error::new(
29            io::ErrorKind::InvalidInput,
30            format!("Invalid commit OID: {start_oid}"),
31        )
32    })?;
33
34    git_diff_from_oid(&repo, oid)
35}
36
37/// Get the git diff from the starting commit.
38///
39/// Uses the saved starting commit from `.agent/start_commit` to generate
40/// an incremental diff. Falls back to diffing from HEAD if no start commit
41/// file exists.
42pub fn get_git_diff_from_start() -> io::Result<String> {
43    use crate::git_helpers::start_commit::{load_start_point, save_start_commit, StartPoint};
44
45    // Ensure a valid starting point exists. This is expected to persist across runs,
46    // but we also repair missing/corrupt files opportunistically for robustness.
47    save_start_commit()?;
48
49    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
50
51    match load_start_point()? {
52        StartPoint::Commit(oid) => git_diff_from(&oid.to_string()),
53        StartPoint::EmptyRepo => git_diff_from_empty_tree(&repo),
54    }
55}
56
57/// Get the git diff from the starting commit (workspace-aware).
58///
59/// This uses `.agent/start_commit` as the baseline and generates a diff between that baseline
60/// and the current state on disk, including staged + unstaged changes and untracked files.
61///
62/// Unlike [`get_git_diff_from_start`], this does not rely on the process CWD.
63pub fn get_git_diff_from_start_with_workspace(workspace: &dyn Workspace) -> io::Result<String> {
64    use crate::git_helpers::start_commit::{
65        load_start_point_with_workspace, save_start_commit_with_workspace, StartPoint,
66    };
67
68    // NOTE: We intentionally discover the repository from the process CWD.
69    // The pipeline sets CWD to the repo root early, and many test harnesses use a
70    // mock workspace root that doesn't exist on disk.
71    //
72    // Prefer the *actual* repo root (workdir) when it exists on disk so tests remain
73    // deterministic even when the process CWD happens to be a real git checkout.
74    //
75    // We still fall back to discovering from CWD when `.git` exists there.
76    let repo_root =
77        crate::git_helpers::get_repo_root().unwrap_or_else(|_| std::path::PathBuf::from("."));
78    let repo = if workspace.exists(std::path::Path::new(".git")) {
79        git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?
80    } else {
81        git2::Repository::discover(&repo_root).map_err(|e| git2_to_io_error(&e))?
82    };
83
84    if !workspace.exists(std::path::Path::new(".git")) {
85        // If the caller's workspace doesn't correspond to a real on-disk repo root (e.g.
86        // MemoryWorkspace tests), don't attempt to discover/emit a diff from the process CWD.
87        return Err(io::Error::new(
88            io::ErrorKind::NotFound,
89            "Workspace has no on-disk git repository",
90        ));
91    }
92
93    // Ensure a valid start point exists. This is expected to persist across runs, but we also
94    // repair missing/corrupt files opportunistically for robustness.
95    save_start_commit_with_workspace(workspace, &repo)?;
96
97    match load_start_point_with_workspace(workspace, &repo)? {
98        StartPoint::Commit(oid) => git_diff_from_oid(&repo, oid),
99        StartPoint::EmptyRepo => git_diff_from_empty_tree(&repo),
100    }
101}
102
103/// Get the diff content that should be shown to reviewers.
104///
105/// Baseline selection:
106/// - If `.agent/review_baseline.txt` is set, diff from that commit.
107/// - Otherwise, diff from `.agent/start_commit` (the initial pipeline baseline).
108///
109/// The diff is always generated against the current state on disk (staged + unstaged + untracked).
110///
111/// Returns `(diff, baseline_oid_for_prompts)` where `baseline_oid_for_prompts` is the commit hash
112/// to mention in fallback instructions (or empty for empty repo baseline).
113pub fn get_git_diff_for_review_with_workspace(
114    workspace: &dyn Workspace,
115) -> io::Result<(String, String)> {
116    use crate::git_helpers::review_baseline::{
117        load_review_baseline_with_workspace, ReviewBaseline,
118    };
119    use crate::git_helpers::start_commit::{
120        load_start_point_with_workspace, save_start_commit_with_workspace, StartPoint,
121    };
122
123    // NOTE: See comment in get_git_diff_from_start_with_workspace.
124    let repo = git2::Repository::discover(".").map_err(|e| git2_to_io_error(&e))?;
125
126    let baseline = load_review_baseline_with_workspace(workspace).unwrap_or(ReviewBaseline::NotSet);
127    match baseline {
128        ReviewBaseline::Commit(oid) => {
129            let diff = git_diff_from_oid(&repo, oid)?;
130            Ok((diff, oid.to_string()))
131        }
132        ReviewBaseline::NotSet => {
133            // Ensure a valid start point exists.
134            save_start_commit_with_workspace(workspace, &repo)?;
135
136            match load_start_point_with_workspace(workspace, &repo)? {
137                StartPoint::Commit(oid) => {
138                    let diff = git_diff_from_oid(&repo, oid)?;
139                    Ok((diff, oid.to_string()))
140                }
141                StartPoint::EmptyRepo => Ok((git_diff_from_empty_tree(&repo)?, String::new())),
142            }
143        }
144    }
145}
146
147/// Implementation of git diff.
148fn git_diff_impl(repo: &git2::Repository) -> io::Result<String> {
149    // Try to get HEAD tree.
150    let head_tree = match repo.head() {
151        Ok(head) => Some(head.peel_to_tree().map_err(|e| git2_to_io_error(&e))?),
152        Err(ref e) if e.code() == git2::ErrorCode::UnbornBranch => {
153            // No commits yet: diff an empty tree against the workdir.
154            let mut diff_opts = git2::DiffOptions::new();
155            diff_opts.include_untracked(true);
156            diff_opts.recurse_untracked_dirs(true);
157
158            let diff = repo
159                .diff_tree_to_workdir_with_index(None, Some(&mut diff_opts))
160                .map_err(|e| git2_to_io_error(&e))?;
161
162            let mut result = Vec::new();
163            diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
164                result.extend_from_slice(line.content());
165                true
166            })
167            .map_err(|e| git2_to_io_error(&e))?;
168
169            return Ok(String::from_utf8_lossy(&result).to_string());
170        }
171        Err(e) => return Err(git2_to_io_error(&e)),
172    };
173
174    // For repos with commits, diff HEAD against working tree (staged + unstaged + untracked).
175    let mut diff_opts = git2::DiffOptions::new();
176    diff_opts.include_untracked(true);
177    diff_opts.recurse_untracked_dirs(true);
178
179    let diff = repo
180        .diff_tree_to_workdir_with_index(head_tree.as_ref(), Some(&mut diff_opts))
181        .map_err(|e| git2_to_io_error(&e))?;
182
183    let mut result = Vec::new();
184    diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
185        result.extend_from_slice(line.content());
186        true
187    })
188    .map_err(|e| git2_to_io_error(&e))?;
189
190    Ok(String::from_utf8_lossy(&result).to_string())
191}
192
193fn git_diff_from_oid(repo: &git2::Repository, oid: git2::Oid) -> io::Result<String> {
194    let start_commit = repo.find_commit(oid).map_err(|e| git2_to_io_error(&e))?;
195    let start_tree = start_commit.tree().map_err(|e| git2_to_io_error(&e))?;
196
197    let mut diff_opts = git2::DiffOptions::new();
198    diff_opts.include_untracked(true);
199    diff_opts.recurse_untracked_dirs(true);
200
201    let diff = repo
202        .diff_tree_to_workdir_with_index(Some(&start_tree), Some(&mut diff_opts))
203        .map_err(|e| git2_to_io_error(&e))?;
204
205    let mut result = Vec::new();
206    diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
207        result.extend_from_slice(line.content());
208        true
209    })
210    .map_err(|e| git2_to_io_error(&e))?;
211
212    Ok(String::from_utf8_lossy(&result).to_string())
213}
214
215/// Generate a diff from the empty tree (initial commit).
216fn git_diff_from_empty_tree(repo: &git2::Repository) -> io::Result<String> {
217    let mut diff_opts = git2::DiffOptions::new();
218    diff_opts.include_untracked(true);
219    diff_opts.recurse_untracked_dirs(true);
220
221    let diff = repo
222        .diff_tree_to_workdir_with_index(None, Some(&mut diff_opts))
223        .map_err(|e| git2_to_io_error(&e))?;
224
225    let mut result = Vec::new();
226    diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
227        result.extend_from_slice(line.content());
228        true
229    })
230    .map_err(|e| git2_to_io_error(&e))?;
231
232    Ok(String::from_utf8_lossy(&result).to_string())
233}