Skip to main content

ralph_workflow/git_helpers/
review_baseline.rs

1//! Per-review-cycle baseline tracking.
2//!
3//! This module manages the baseline commit for each review cycle, ensuring that
4//! reviewers only see changes from the current cycle rather than cumulative changes
5//! from previous fix commits.
6//!
7//! # Overview
8//!
9//! During the review-fix phase, each cycle should:
10//! 1. Capture baseline before review (current HEAD)
11//! 2. Review sees diff from that baseline
12//! 3. Fixer makes changes
13//! 4. Baseline is updated after fix pass
14//! 5. Next review cycle sees only new changes
15//!
16//! This prevents "diff scope creep" where previous fix commits pollute
17//! subsequent review passes.
18
19use std::io;
20use std::path::Path;
21
22use crate::workspace::{Workspace, WorkspaceFs};
23
24use super::start_commit::get_current_head_oid;
25
26/// Path to the review baseline file.
27///
28/// Stored in `.agent/review_baseline.txt`, this file contains the OID (SHA) of the
29/// commit that serves as the baseline for the current review cycle.
30const REVIEW_BASELINE_FILE: &str = ".agent/review_baseline.txt";
31
32/// Sentinel value when review baseline is not set.
33const BASELINE_NOT_SET: &str = "__BASELINE_NOT_SET__";
34
35/// Review baseline state.
36#[derive(Debug, Clone, PartialEq, Eq)]
37pub enum ReviewBaseline {
38    /// A concrete commit OID to diff from.
39    Commit(git2::Oid),
40    /// Baseline not set (first review cycle).
41    NotSet,
42}
43
44/// Update the review baseline to current HEAD.
45///
46/// This should be called AFTER each fix pass to update the baseline so
47/// the next review cycle sees only new changes.
48///
49/// # Errors
50///
51/// Returns an error if:
52/// - The current HEAD cannot be determined
53/// - The file cannot be written
54///
55/// **Note:** This function uses the current working directory to discover the repo.
56/// For explicit path control, use [`update_review_baseline_at`] instead.
57pub fn update_review_baseline() -> io::Result<()> {
58    let oid = get_current_head_oid()?;
59    write_review_baseline_cwd(&oid)
60}
61
62/// Load the review baseline.
63///
64/// Returns the baseline commit for the current review cycle.
65///
66/// # Errors
67///
68/// Returns an error if:
69/// - The file cannot be read
70/// - The file content is invalid
71///
72pub fn load_review_baseline() -> io::Result<ReviewBaseline> {
73    // Use CWD as workspace root since this is a relative path
74    let workspace = WorkspaceFs::new(std::env::current_dir()?);
75    load_review_baseline_with_workspace(&workspace)
76}
77
78/// Get information about the current review baseline.
79///
80/// Returns a tuple of (baseline_oid, commits_since_baseline, is_stale).
81/// - `baseline_oid`: The OID of the baseline commit (or None if not set)
82/// - `commits_since_baseline`: Number of commits since baseline
83/// - `is_stale`: true if baseline is old (>10 commits behind)
84///
85pub fn get_review_baseline_info() -> io::Result<(Option<String>, usize, bool)> {
86    let repo = git2::Repository::discover(".").map_err(|e| to_io_error(&e))?;
87    get_review_baseline_info_impl(&repo, load_review_baseline()?)
88}
89
90/// Implementation of get_review_baseline_info.
91fn get_review_baseline_info_impl(
92    repo: &git2::Repository,
93    baseline: ReviewBaseline,
94) -> io::Result<(Option<String>, usize, bool)> {
95    let baseline_oid = match baseline {
96        ReviewBaseline::Commit(oid) => Some(oid.to_string()),
97        ReviewBaseline::NotSet => None,
98    };
99
100    let commits_since = if let Some(ref oid) = baseline_oid {
101        count_commits_since(repo, oid)?
102    } else {
103        0
104    };
105
106    let is_stale = commits_since > 10;
107
108    Ok((baseline_oid, commits_since, is_stale))
109}
110
111/// Write the review baseline to disk (CWD-based, for backward compatibility).
112fn write_review_baseline_cwd(oid: &str) -> io::Result<()> {
113    let workspace = WorkspaceFs::new(std::env::current_dir()?);
114    write_review_baseline_with_workspace(&workspace, oid)
115}
116
117/// Write the review baseline using workspace abstraction.
118///
119/// This is the workspace-aware version for pipeline code.
120fn write_review_baseline_with_workspace(workspace: &dyn Workspace, oid: &str) -> io::Result<()> {
121    workspace.write(Path::new(REVIEW_BASELINE_FILE), oid)
122}
123
124/// Load the review baseline using workspace abstraction.
125///
126/// This is the workspace-aware version for pipeline code.
127pub fn load_review_baseline_with_workspace(
128    workspace: &dyn Workspace,
129) -> io::Result<ReviewBaseline> {
130    let path = Path::new(REVIEW_BASELINE_FILE);
131
132    if !workspace.exists(path) {
133        return Ok(ReviewBaseline::NotSet);
134    }
135
136    let content = workspace.read(path)?;
137    let raw = content.trim();
138
139    if raw.is_empty() || raw == BASELINE_NOT_SET {
140        return Ok(ReviewBaseline::NotSet);
141    }
142
143    // Parse the OID
144    let oid = git2::Oid::from_str(raw).map_err(|_| {
145        io::Error::new(
146            io::ErrorKind::InvalidData,
147            format!(
148                "Invalid OID format in {}: '{}'. The review baseline will be reset. \
149                 Run 'ralph --reset-start-commit' if this persists.",
150                REVIEW_BASELINE_FILE, raw
151            ),
152        )
153    })?;
154
155    Ok(ReviewBaseline::Commit(oid))
156}
157
158/// Update the review baseline to current HEAD using workspace abstraction.
159///
160/// This should be called AFTER each fix pass to update the baseline.
161pub fn update_review_baseline_with_workspace(workspace: &dyn Workspace) -> io::Result<()> {
162    let oid = get_current_head_oid()?;
163    write_review_baseline_with_workspace(workspace, &oid)
164}
165
166/// Count commits since a given baseline.
167fn count_commits_since(repo: &git2::Repository, baseline_oid: &str) -> io::Result<usize> {
168    let oid = git2::Oid::from_str(baseline_oid).map_err(|_| {
169        io::Error::new(
170            io::ErrorKind::InvalidInput,
171            format!("Invalid baseline OID: {baseline_oid}"),
172        )
173    })?;
174
175    let baseline = repo.find_commit(oid).map_err(|e| to_io_error(&e))?;
176
177    // Try to get HEAD and count commits
178    match repo.head() {
179        Ok(head) => {
180            let head_commit = head.peel_to_commit().map_err(|e| to_io_error(&e))?;
181
182            // Use revwalk to count commits
183            let mut revwalk = repo.revwalk().map_err(|e| to_io_error(&e))?;
184            revwalk
185                .push(head_commit.id())
186                .map_err(|e| to_io_error(&e))?;
187
188            let mut count = 0;
189            for commit_id in revwalk {
190                let commit_id = commit_id.map_err(|e| to_io_error(&e))?;
191                if commit_id == baseline.id() {
192                    break;
193                }
194                count += 1;
195                // Safety limit to prevent infinite loops
196                if count > 1000 {
197                    break;
198                }
199            }
200            Ok(count)
201        }
202        Err(_) => Ok(0),
203    }
204}
205
206/// Diff statistics for the changes since baseline.
207#[derive(Debug, Clone, Default)]
208pub struct DiffStats {
209    /// Number of files changed.
210    pub files_changed: usize,
211    /// Number of lines added.
212    pub lines_added: usize,
213    /// Number of lines deleted.
214    pub lines_deleted: usize,
215    /// List of changed file paths (up to 10 for display).
216    pub changed_files: Vec<String>,
217}
218
219/// Baseline summary information for display.
220#[derive(Debug, Clone)]
221pub struct BaselineSummary {
222    /// The baseline OID (short form).
223    pub baseline_oid: Option<String>,
224    /// Number of commits since baseline.
225    pub commits_since: usize,
226    /// Whether the baseline is stale (>10 commits behind).
227    pub is_stale: bool,
228    /// Diff statistics for changes since baseline.
229    pub diff_stats: DiffStats,
230}
231
232impl BaselineSummary {
233    /// Format a compact version for inline display.
234    pub fn format_compact(&self) -> String {
235        match &self.baseline_oid {
236            Some(oid) => {
237                let short_oid = &oid[..8.min(oid.len())];
238                if self.is_stale {
239                    format!(
240                        "Baseline: {} (+{} commits since, {} files changed)",
241                        short_oid, self.commits_since, self.diff_stats.files_changed
242                    )
243                } else if self.commits_since > 0 {
244                    format!(
245                        "Baseline: {} ({} commits since, {} files changed)",
246                        short_oid, self.commits_since, self.diff_stats.files_changed
247                    )
248                } else {
249                    format!(
250                        "Baseline: {} ({} files: +{}/-{} lines)",
251                        short_oid,
252                        self.diff_stats.files_changed,
253                        self.diff_stats.lines_added,
254                        self.diff_stats.lines_deleted
255                    )
256                }
257            }
258            None => {
259                format!(
260                    "Baseline: start_commit ({} files: +{}/-{} lines)",
261                    self.diff_stats.files_changed,
262                    self.diff_stats.lines_added,
263                    self.diff_stats.lines_deleted
264                )
265            }
266        }
267    }
268
269    /// Format a detailed version for verbose display.
270    pub fn format_detailed(&self) -> String {
271        let mut lines = Vec::new();
272
273        lines.push("Review Baseline Summary:".to_string());
274        lines.push("─".repeat(40));
275
276        match &self.baseline_oid {
277            Some(oid) => {
278                let short_oid = &oid[..8.min(oid.len())];
279                lines.push(format!("  Commit: {}", short_oid));
280                if self.commits_since > 0 {
281                    lines.push(format!("  Commits since baseline: {}", self.commits_since));
282                }
283            }
284            None => {
285                lines.push("  Commit: start_commit (initial baseline)".to_string());
286            }
287        }
288
289        lines.push(format!(
290            "  Files changed: {}",
291            self.diff_stats.files_changed
292        ));
293        lines.push(format!("  Lines added: {}", self.diff_stats.lines_added));
294        lines.push(format!(
295            "  Lines deleted: {}",
296            self.diff_stats.lines_deleted
297        ));
298
299        if !self.diff_stats.changed_files.is_empty() {
300            lines.push(String::new());
301            lines.push("  Changed files:".to_string());
302            for file in &self.diff_stats.changed_files {
303                lines.push(format!("    - {}", file));
304            }
305            if self.diff_stats.changed_files.len() < self.diff_stats.files_changed {
306                let remaining = self.diff_stats.files_changed - self.diff_stats.changed_files.len();
307                lines.push(format!("    ... and {} more", remaining));
308            }
309        }
310
311        if self.is_stale {
312            lines.push(String::new());
313            lines.push(
314                "  ⚠ WARNING: Baseline is stale. Consider updating with --reset-start-commit."
315                    .to_string(),
316            );
317        }
318
319        lines.join("\n")
320    }
321}
322
323/// Get a summary of the baseline state for display.
324///
325/// Returns a `BaselineSummary` containing information about the current
326/// baseline, commits since baseline, staleness, and diff statistics.
327///
328pub fn get_baseline_summary() -> io::Result<BaselineSummary> {
329    let repo = git2::Repository::discover(".").map_err(|e| to_io_error(&e))?;
330    get_baseline_summary_impl(&repo, load_review_baseline()?)
331}
332
333/// Implementation of get_baseline_summary.
334fn get_baseline_summary_impl(
335    repo: &git2::Repository,
336    baseline: ReviewBaseline,
337) -> io::Result<BaselineSummary> {
338    let baseline_oid = match baseline {
339        ReviewBaseline::Commit(oid) => Some(oid.to_string()),
340        ReviewBaseline::NotSet => None,
341    };
342
343    let commits_since = if let Some(ref oid) = baseline_oid {
344        count_commits_since(repo, oid)?
345    } else {
346        0
347    };
348
349    let is_stale = commits_since > 10;
350
351    // Get diff statistics
352    let diff_stats = get_diff_stats(repo, &baseline_oid)?;
353
354    Ok(BaselineSummary {
355        baseline_oid,
356        commits_since,
357        is_stale,
358        diff_stats,
359    })
360}
361
362/// Count lines in a blob content.
363///
364/// Returns the number of lines, matching the behavior of counting
365/// newlines and adding 1 (so empty content returns 0, but any content
366/// returns at least 1).
367fn count_lines_in_blob(content: &[u8]) -> usize {
368    if content.is_empty() {
369        return 0;
370    }
371    // Count newlines and add 1 to get the line count
372    // This matches the previous behavior and ensures that even files
373    // without trailing newlines are counted correctly
374    content.iter().filter(|&&c| c == b'\n').count() + 1
375}
376
377/// Get diff statistics for changes since the baseline.
378fn get_diff_stats(repo: &git2::Repository, baseline_oid: &Option<String>) -> io::Result<DiffStats> {
379    let baseline_tree = match baseline_oid {
380        Some(oid) => {
381            let oid = git2::Oid::from_str(oid).map_err(|_| {
382                io::Error::new(
383                    io::ErrorKind::InvalidInput,
384                    format!("Invalid baseline OID: {}", oid),
385                )
386            })?;
387            let commit = repo.find_commit(oid).map_err(|e| to_io_error(&e))?;
388            commit.tree().map_err(|e| to_io_error(&e))?
389        }
390        None => {
391            // No baseline set, use empty tree
392            repo.find_tree(git2::Oid::zero())
393                .map_err(|e| to_io_error(&e))?
394        }
395    };
396
397    // Get the current HEAD tree
398    let head_tree = match repo.head() {
399        Ok(head) => {
400            let commit = head.peel_to_commit().map_err(|e| to_io_error(&e))?;
401            commit.tree().map_err(|e| to_io_error(&e))?
402        }
403        Err(_) => {
404            // No HEAD yet, use empty tree
405            repo.find_tree(git2::Oid::zero())
406                .map_err(|e| to_io_error(&e))?
407        }
408    };
409
410    // Generate diff
411    let diff = repo
412        .diff_tree_to_tree(Some(&baseline_tree), Some(&head_tree), None)
413        .map_err(|e| to_io_error(&e))?;
414
415    // Collect statistics
416    let mut stats = DiffStats::default();
417    let mut delta_ids = Vec::new();
418
419    diff.foreach(
420        &mut |delta, _progress| {
421            use git2::Delta;
422
423            stats.files_changed += 1;
424
425            if let Some(path) = delta.new_file().path() {
426                let path_str = path.to_string_lossy().to_string();
427                if stats.changed_files.len() < 10 {
428                    stats.changed_files.push(path_str);
429                }
430            } else if let Some(path) = delta.old_file().path() {
431                let path_str = path.to_string_lossy().to_string();
432                if stats.changed_files.len() < 10 {
433                    stats.changed_files.push(path_str);
434                }
435            }
436
437            match delta.status() {
438                Delta::Added => {
439                    delta_ids.push((delta.new_file().id(), true));
440                }
441                Delta::Deleted => {
442                    delta_ids.push((delta.old_file().id(), false));
443                }
444                Delta::Modified => {
445                    delta_ids.push((delta.new_file().id(), true));
446                }
447                _ => {}
448            }
449
450            true
451        },
452        None,
453        None,
454        None,
455    )
456    .map_err(|e| to_io_error(&e))?;
457
458    // Count lines added/deleted
459    for (blob_id, is_new_or_modified) in delta_ids {
460        if let Ok(blob) = repo.find_blob(blob_id) {
461            let line_count = count_lines_in_blob(blob.content());
462
463            if is_new_or_modified {
464                stats.lines_added += line_count;
465            } else {
466                stats.lines_deleted += line_count;
467            }
468        }
469    }
470
471    Ok(stats)
472}
473
474/// Convert git2 error to `io::Error`.
475fn to_io_error(err: &git2::Error) -> io::Error {
476    io::Error::other(err.to_string())
477}
478
479#[cfg(test)]
480mod tests {
481    use super::*;
482
483    #[test]
484    fn test_review_baseline_file_path_defined() {
485        assert_eq!(REVIEW_BASELINE_FILE, ".agent/review_baseline.txt");
486    }
487
488    #[test]
489    fn test_load_review_baseline_returns_result() {
490        let result = load_review_baseline();
491        assert!(result.is_ok() || result.is_err());
492    }
493
494    #[test]
495    fn test_get_review_baseline_info_returns_result() {
496        let result = get_review_baseline_info();
497        assert!(result.is_ok() || result.is_err());
498    }
499
500    // =========================================================================
501    // Workspace-aware function tests
502    // =========================================================================
503
504    #[test]
505    fn test_load_review_baseline_with_workspace_not_set() {
506        use crate::workspace::MemoryWorkspace;
507
508        let workspace = MemoryWorkspace::new_test();
509
510        let result = load_review_baseline_with_workspace(&workspace).unwrap();
511        assert_eq!(result, ReviewBaseline::NotSet);
512    }
513
514    #[test]
515    fn test_load_review_baseline_with_workspace_sentinel() {
516        use crate::workspace::MemoryWorkspace;
517
518        let workspace =
519            MemoryWorkspace::new_test().with_file(".agent/review_baseline.txt", BASELINE_NOT_SET);
520
521        let result = load_review_baseline_with_workspace(&workspace).unwrap();
522        assert_eq!(result, ReviewBaseline::NotSet);
523    }
524
525    #[test]
526    fn test_load_review_baseline_with_workspace_empty() {
527        use crate::workspace::MemoryWorkspace;
528
529        let workspace = MemoryWorkspace::new_test().with_file(".agent/review_baseline.txt", "");
530
531        let result = load_review_baseline_with_workspace(&workspace).unwrap();
532        assert_eq!(result, ReviewBaseline::NotSet);
533    }
534
535    #[test]
536    fn test_load_review_baseline_with_workspace_valid_oid() {
537        use crate::workspace::MemoryWorkspace;
538
539        let workspace = MemoryWorkspace::new_test().with_file(
540            ".agent/review_baseline.txt",
541            "abcd1234abcd1234abcd1234abcd1234abcd1234",
542        );
543
544        let result = load_review_baseline_with_workspace(&workspace).unwrap();
545        let expected_oid = git2::Oid::from_str("abcd1234abcd1234abcd1234abcd1234abcd1234").unwrap();
546        assert_eq!(result, ReviewBaseline::Commit(expected_oid));
547    }
548
549    #[test]
550    fn test_load_review_baseline_with_workspace_invalid_oid() {
551        use crate::workspace::MemoryWorkspace;
552
553        let workspace =
554            MemoryWorkspace::new_test().with_file(".agent/review_baseline.txt", "invalid");
555
556        let result = load_review_baseline_with_workspace(&workspace);
557        assert!(result.is_err());
558        assert_eq!(result.unwrap_err().kind(), io::ErrorKind::InvalidData);
559    }
560}