Skip to main content

ralph_workflow/git_helpers/
review_baseline.rs

1//! Per-review-cycle baseline tracking.
2//!
3//! This module manages the baseline commit for each review cycle, ensuring that
4//! reviewers only see changes from the current cycle rather than cumulative changes
5//! from previous fix commits.
6//!
7//! # Overview
8//!
9//! During the review-fix phase, each cycle should:
10//! 1. Capture baseline before review (current HEAD)
11//! 2. Review sees diff from that baseline
12//! 3. Fixer makes changes
13//! 4. Baseline is updated after fix pass
14//! 5. Next review cycle sees only new changes
15//!
16//! This prevents "diff scope creep" where previous fix commits pollute
17//! subsequent review passes.
18
19use std::fs;
20use std::io;
21use std::path::Path;
22
23#[cfg(any(test, feature = "test-utils"))]
24use crate::workspace::Workspace;
25
26use super::start_commit::get_current_head_oid;
27
28/// Path to the review baseline file.
29///
30/// Stored in `.agent/review_baseline.txt`, this file contains the OID (SHA) of the
31/// commit that serves as the baseline for the current review cycle.
32const REVIEW_BASELINE_FILE: &str = ".agent/review_baseline.txt";
33
34/// Sentinel value when review baseline is not set.
35const BASELINE_NOT_SET: &str = "__BASELINE_NOT_SET__";
36
37/// Review baseline state.
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub enum ReviewBaseline {
40    /// A concrete commit OID to diff from.
41    Commit(git2::Oid),
42    /// Baseline not set (first review cycle).
43    NotSet,
44}
45
46/// Update the review baseline to current HEAD.
47///
48/// This should be called AFTER each fix pass to update the baseline so
49/// the next review cycle sees only new changes.
50///
51/// # Errors
52///
53/// Returns an error if:
54/// - The current HEAD cannot be determined
55/// - The file cannot be written
56///
57/// **Note:** This function uses the current working directory to discover the repo.
58/// For explicit path control, use [`update_review_baseline_at`] instead.
59pub fn update_review_baseline() -> io::Result<()> {
60    let oid = get_current_head_oid()?;
61    write_review_baseline_cwd(&oid)
62}
63
64/// Load the review baseline.
65///
66/// Returns the baseline commit for the current review cycle.
67///
68/// # Errors
69///
70/// Returns an error if:
71/// - The file cannot be read
72/// - The file content is invalid
73///
74pub fn load_review_baseline() -> io::Result<ReviewBaseline> {
75    let path = Path::new(REVIEW_BASELINE_FILE);
76    load_review_baseline_impl(path)
77}
78
79/// Implementation of load_review_baseline.
80fn load_review_baseline_impl(path: &Path) -> io::Result<ReviewBaseline> {
81    if !path.exists() {
82        return Ok(ReviewBaseline::NotSet);
83    }
84
85    let content = fs::read_to_string(path)?;
86    let raw = content.trim();
87
88    if raw.is_empty() || raw == BASELINE_NOT_SET {
89        return Ok(ReviewBaseline::NotSet);
90    }
91
92    // Parse the OID
93    let oid = git2::Oid::from_str(raw).map_err(|_| {
94        io::Error::new(
95            io::ErrorKind::InvalidData,
96            format!("Invalid OID format in {}: '{}'. The review baseline will be reset. Run 'ralph --reset-start-commit' if this persists.", REVIEW_BASELINE_FILE, raw),
97        )
98    })?;
99
100    Ok(ReviewBaseline::Commit(oid))
101}
102
103/// Get information about the current review baseline.
104///
105/// Returns a tuple of (baseline_oid, commits_since_baseline, is_stale).
106/// - `baseline_oid`: The OID of the baseline commit (or None if not set)
107/// - `commits_since_baseline`: Number of commits since baseline
108/// - `is_stale`: true if baseline is old (>10 commits behind)
109///
110pub fn get_review_baseline_info() -> io::Result<(Option<String>, usize, bool)> {
111    let repo = git2::Repository::discover(".").map_err(|e| to_io_error(&e))?;
112    get_review_baseline_info_impl(&repo, load_review_baseline()?)
113}
114
115/// Implementation of get_review_baseline_info.
116fn get_review_baseline_info_impl(
117    repo: &git2::Repository,
118    baseline: ReviewBaseline,
119) -> io::Result<(Option<String>, usize, bool)> {
120    let baseline_oid = match baseline {
121        ReviewBaseline::Commit(oid) => Some(oid.to_string()),
122        ReviewBaseline::NotSet => None,
123    };
124
125    let commits_since = if let Some(ref oid) = baseline_oid {
126        count_commits_since(repo, oid)?
127    } else {
128        0
129    };
130
131    let is_stale = commits_since > 10;
132
133    Ok((baseline_oid, commits_since, is_stale))
134}
135
136/// Write the review baseline to disk (CWD-based, for backward compatibility).
137fn write_review_baseline_cwd(oid: &str) -> io::Result<()> {
138    let path = Path::new(REVIEW_BASELINE_FILE);
139    if let Some(parent) = path.parent() {
140        fs::create_dir_all(parent)?;
141    }
142    fs::write(path, oid)?;
143    Ok(())
144}
145
146/// Write the review baseline using workspace abstraction.
147///
148/// This is the workspace-aware version for pipeline code.
149#[cfg(any(test, feature = "test-utils"))]
150fn write_review_baseline_with_workspace(workspace: &dyn Workspace, oid: &str) -> io::Result<()> {
151    workspace.write(Path::new(REVIEW_BASELINE_FILE), oid)
152}
153
154/// Load the review baseline using workspace abstraction.
155///
156/// This is the workspace-aware version for pipeline code.
157#[cfg(any(test, feature = "test-utils"))]
158pub fn load_review_baseline_with_workspace(
159    workspace: &dyn Workspace,
160) -> io::Result<ReviewBaseline> {
161    let path = Path::new(REVIEW_BASELINE_FILE);
162
163    if !workspace.exists(path) {
164        return Ok(ReviewBaseline::NotSet);
165    }
166
167    let content = workspace.read(path)?;
168    let raw = content.trim();
169
170    if raw.is_empty() || raw == BASELINE_NOT_SET {
171        return Ok(ReviewBaseline::NotSet);
172    }
173
174    // Parse the OID
175    let oid = git2::Oid::from_str(raw).map_err(|_| {
176        io::Error::new(
177            io::ErrorKind::InvalidData,
178            format!(
179                "Invalid OID format in {}: '{}'. The review baseline will be reset. \
180                 Run 'ralph --reset-start-commit' if this persists.",
181                REVIEW_BASELINE_FILE, raw
182            ),
183        )
184    })?;
185
186    Ok(ReviewBaseline::Commit(oid))
187}
188
189/// Update the review baseline to current HEAD using workspace abstraction.
190///
191/// This should be called AFTER each fix pass to update the baseline.
192#[cfg(any(test, feature = "test-utils"))]
193pub fn update_review_baseline_with_workspace(workspace: &dyn Workspace) -> io::Result<()> {
194    let oid = get_current_head_oid()?;
195    write_review_baseline_with_workspace(workspace, &oid)
196}
197
198/// Count commits since a given baseline.
199fn count_commits_since(repo: &git2::Repository, baseline_oid: &str) -> io::Result<usize> {
200    let oid = git2::Oid::from_str(baseline_oid).map_err(|_| {
201        io::Error::new(
202            io::ErrorKind::InvalidInput,
203            format!("Invalid baseline OID: {baseline_oid}"),
204        )
205    })?;
206
207    let baseline = repo.find_commit(oid).map_err(|e| to_io_error(&e))?;
208
209    // Try to get HEAD and count commits
210    match repo.head() {
211        Ok(head) => {
212            let head_commit = head.peel_to_commit().map_err(|e| to_io_error(&e))?;
213
214            // Use revwalk to count commits
215            let mut revwalk = repo.revwalk().map_err(|e| to_io_error(&e))?;
216            revwalk
217                .push(head_commit.id())
218                .map_err(|e| to_io_error(&e))?;
219
220            let mut count = 0;
221            for commit_id in revwalk {
222                let commit_id = commit_id.map_err(|e| to_io_error(&e))?;
223                if commit_id == baseline.id() {
224                    break;
225                }
226                count += 1;
227                // Safety limit to prevent infinite loops
228                if count > 1000 {
229                    break;
230                }
231            }
232            Ok(count)
233        }
234        Err(_) => Ok(0),
235    }
236}
237
238/// Diff statistics for the changes since baseline.
239#[derive(Debug, Clone, Default)]
240pub struct DiffStats {
241    /// Number of files changed.
242    pub files_changed: usize,
243    /// Number of lines added.
244    pub lines_added: usize,
245    /// Number of lines deleted.
246    pub lines_deleted: usize,
247    /// List of changed file paths (up to 10 for display).
248    pub changed_files: Vec<String>,
249}
250
251/// Baseline summary information for display.
252#[derive(Debug, Clone)]
253pub struct BaselineSummary {
254    /// The baseline OID (short form).
255    pub baseline_oid: Option<String>,
256    /// Number of commits since baseline.
257    pub commits_since: usize,
258    /// Whether the baseline is stale (>10 commits behind).
259    pub is_stale: bool,
260    /// Diff statistics for changes since baseline.
261    pub diff_stats: DiffStats,
262}
263
264impl BaselineSummary {
265    /// Format a compact version for inline display.
266    pub fn format_compact(&self) -> String {
267        match &self.baseline_oid {
268            Some(oid) => {
269                let short_oid = &oid[..8.min(oid.len())];
270                if self.is_stale {
271                    format!(
272                        "Baseline: {} (+{} commits since, {} files changed)",
273                        short_oid, self.commits_since, self.diff_stats.files_changed
274                    )
275                } else if self.commits_since > 0 {
276                    format!(
277                        "Baseline: {} ({} commits since, {} files changed)",
278                        short_oid, self.commits_since, self.diff_stats.files_changed
279                    )
280                } else {
281                    format!(
282                        "Baseline: {} ({} files: +{}/-{} lines)",
283                        short_oid,
284                        self.diff_stats.files_changed,
285                        self.diff_stats.lines_added,
286                        self.diff_stats.lines_deleted
287                    )
288                }
289            }
290            None => {
291                format!(
292                    "Baseline: start_commit ({} files: +{}/-{} lines)",
293                    self.diff_stats.files_changed,
294                    self.diff_stats.lines_added,
295                    self.diff_stats.lines_deleted
296                )
297            }
298        }
299    }
300
301    /// Format a detailed version for verbose display.
302    pub fn format_detailed(&self) -> String {
303        let mut lines = Vec::new();
304
305        lines.push("Review Baseline Summary:".to_string());
306        lines.push("─".repeat(40));
307
308        match &self.baseline_oid {
309            Some(oid) => {
310                let short_oid = &oid[..8.min(oid.len())];
311                lines.push(format!("  Commit: {}", short_oid));
312                if self.commits_since > 0 {
313                    lines.push(format!("  Commits since baseline: {}", self.commits_since));
314                }
315            }
316            None => {
317                lines.push("  Commit: start_commit (initial baseline)".to_string());
318            }
319        }
320
321        lines.push(format!(
322            "  Files changed: {}",
323            self.diff_stats.files_changed
324        ));
325        lines.push(format!("  Lines added: {}", self.diff_stats.lines_added));
326        lines.push(format!(
327            "  Lines deleted: {}",
328            self.diff_stats.lines_deleted
329        ));
330
331        if !self.diff_stats.changed_files.is_empty() {
332            lines.push(String::new());
333            lines.push("  Changed files:".to_string());
334            for file in &self.diff_stats.changed_files {
335                lines.push(format!("    - {}", file));
336            }
337            if self.diff_stats.changed_files.len() < self.diff_stats.files_changed {
338                let remaining = self.diff_stats.files_changed - self.diff_stats.changed_files.len();
339                lines.push(format!("    ... and {} more", remaining));
340            }
341        }
342
343        if self.is_stale {
344            lines.push(String::new());
345            lines.push(
346                "  ⚠ WARNING: Baseline is stale. Consider updating with --reset-start-commit."
347                    .to_string(),
348            );
349        }
350
351        lines.join("\n")
352    }
353}
354
355/// Get a summary of the baseline state for display.
356///
357/// Returns a `BaselineSummary` containing information about the current
358/// baseline, commits since baseline, staleness, and diff statistics.
359///
360pub fn get_baseline_summary() -> io::Result<BaselineSummary> {
361    let repo = git2::Repository::discover(".").map_err(|e| to_io_error(&e))?;
362    get_baseline_summary_impl(&repo, load_review_baseline()?)
363}
364
365/// Implementation of get_baseline_summary.
366fn get_baseline_summary_impl(
367    repo: &git2::Repository,
368    baseline: ReviewBaseline,
369) -> io::Result<BaselineSummary> {
370    let baseline_oid = match baseline {
371        ReviewBaseline::Commit(oid) => Some(oid.to_string()),
372        ReviewBaseline::NotSet => None,
373    };
374
375    let commits_since = if let Some(ref oid) = baseline_oid {
376        count_commits_since(repo, oid)?
377    } else {
378        0
379    };
380
381    let is_stale = commits_since > 10;
382
383    // Get diff statistics
384    let diff_stats = get_diff_stats(repo, &baseline_oid)?;
385
386    Ok(BaselineSummary {
387        baseline_oid,
388        commits_since,
389        is_stale,
390        diff_stats,
391    })
392}
393
394/// Count lines in a blob content.
395///
396/// Returns the number of lines, matching the behavior of counting
397/// newlines and adding 1 (so empty content returns 0, but any content
398/// returns at least 1).
399fn count_lines_in_blob(content: &[u8]) -> usize {
400    if content.is_empty() {
401        return 0;
402    }
403    // Count newlines and add 1 to get the line count
404    // This matches the previous behavior and ensures that even files
405    // without trailing newlines are counted correctly
406    content.iter().filter(|&&c| c == b'\n').count() + 1
407}
408
409/// Get diff statistics for changes since the baseline.
410fn get_diff_stats(repo: &git2::Repository, baseline_oid: &Option<String>) -> io::Result<DiffStats> {
411    let baseline_tree = match baseline_oid {
412        Some(oid) => {
413            let oid = git2::Oid::from_str(oid).map_err(|_| {
414                io::Error::new(
415                    io::ErrorKind::InvalidInput,
416                    format!("Invalid baseline OID: {}", oid),
417                )
418            })?;
419            let commit = repo.find_commit(oid).map_err(|e| to_io_error(&e))?;
420            commit.tree().map_err(|e| to_io_error(&e))?
421        }
422        None => {
423            // No baseline set, use empty tree
424            repo.find_tree(git2::Oid::zero())
425                .map_err(|e| to_io_error(&e))?
426        }
427    };
428
429    // Get the current HEAD tree
430    let head_tree = match repo.head() {
431        Ok(head) => {
432            let commit = head.peel_to_commit().map_err(|e| to_io_error(&e))?;
433            commit.tree().map_err(|e| to_io_error(&e))?
434        }
435        Err(_) => {
436            // No HEAD yet, use empty tree
437            repo.find_tree(git2::Oid::zero())
438                .map_err(|e| to_io_error(&e))?
439        }
440    };
441
442    // Generate diff
443    let diff = repo
444        .diff_tree_to_tree(Some(&baseline_tree), Some(&head_tree), None)
445        .map_err(|e| to_io_error(&e))?;
446
447    // Collect statistics
448    let mut stats = DiffStats::default();
449    let mut delta_ids = Vec::new();
450
451    diff.foreach(
452        &mut |delta, _progress| {
453            use git2::Delta;
454
455            stats.files_changed += 1;
456
457            if let Some(path) = delta.new_file().path() {
458                let path_str = path.to_string_lossy().to_string();
459                if stats.changed_files.len() < 10 {
460                    stats.changed_files.push(path_str);
461                }
462            } else if let Some(path) = delta.old_file().path() {
463                let path_str = path.to_string_lossy().to_string();
464                if stats.changed_files.len() < 10 {
465                    stats.changed_files.push(path_str);
466                }
467            }
468
469            match delta.status() {
470                Delta::Added => {
471                    delta_ids.push((delta.new_file().id(), true));
472                }
473                Delta::Deleted => {
474                    delta_ids.push((delta.old_file().id(), false));
475                }
476                Delta::Modified => {
477                    delta_ids.push((delta.new_file().id(), true));
478                }
479                _ => {}
480            }
481
482            true
483        },
484        None,
485        None,
486        None,
487    )
488    .map_err(|e| to_io_error(&e))?;
489
490    // Count lines added/deleted
491    for (blob_id, is_new_or_modified) in delta_ids {
492        if let Ok(blob) = repo.find_blob(blob_id) {
493            let line_count = count_lines_in_blob(blob.content());
494
495            if is_new_or_modified {
496                stats.lines_added += line_count;
497            } else {
498                stats.lines_deleted += line_count;
499            }
500        }
501    }
502
503    Ok(stats)
504}
505
506/// Convert git2 error to `io::Error`.
507fn to_io_error(err: &git2::Error) -> io::Error {
508    io::Error::other(err.to_string())
509}
510
511#[cfg(test)]
512mod tests {
513    use super::*;
514
515    #[test]
516    fn test_review_baseline_file_path_defined() {
517        assert_eq!(REVIEW_BASELINE_FILE, ".agent/review_baseline.txt");
518    }
519
520    #[test]
521    fn test_load_review_baseline_returns_result() {
522        let result = load_review_baseline();
523        assert!(result.is_ok() || result.is_err());
524    }
525
526    #[test]
527    fn test_get_review_baseline_info_returns_result() {
528        let result = get_review_baseline_info();
529        assert!(result.is_ok() || result.is_err());
530    }
531
532    // =========================================================================
533    // Workspace-aware function tests
534    // =========================================================================
535
536    #[test]
537    fn test_load_review_baseline_with_workspace_not_set() {
538        use crate::workspace::MemoryWorkspace;
539
540        let workspace = MemoryWorkspace::new_test();
541
542        let result = load_review_baseline_with_workspace(&workspace).unwrap();
543        assert_eq!(result, ReviewBaseline::NotSet);
544    }
545
546    #[test]
547    fn test_load_review_baseline_with_workspace_sentinel() {
548        use crate::workspace::MemoryWorkspace;
549
550        let workspace =
551            MemoryWorkspace::new_test().with_file(".agent/review_baseline.txt", BASELINE_NOT_SET);
552
553        let result = load_review_baseline_with_workspace(&workspace).unwrap();
554        assert_eq!(result, ReviewBaseline::NotSet);
555    }
556
557    #[test]
558    fn test_load_review_baseline_with_workspace_empty() {
559        use crate::workspace::MemoryWorkspace;
560
561        let workspace = MemoryWorkspace::new_test().with_file(".agent/review_baseline.txt", "");
562
563        let result = load_review_baseline_with_workspace(&workspace).unwrap();
564        assert_eq!(result, ReviewBaseline::NotSet);
565    }
566
567    #[test]
568    fn test_load_review_baseline_with_workspace_valid_oid() {
569        use crate::workspace::MemoryWorkspace;
570
571        let workspace = MemoryWorkspace::new_test().with_file(
572            ".agent/review_baseline.txt",
573            "abcd1234abcd1234abcd1234abcd1234abcd1234",
574        );
575
576        let result = load_review_baseline_with_workspace(&workspace).unwrap();
577        let expected_oid = git2::Oid::from_str("abcd1234abcd1234abcd1234abcd1234abcd1234").unwrap();
578        assert_eq!(result, ReviewBaseline::Commit(expected_oid));
579    }
580
581    #[test]
582    fn test_load_review_baseline_with_workspace_invalid_oid() {
583        use crate::workspace::MemoryWorkspace;
584
585        let workspace =
586            MemoryWorkspace::new_test().with_file(".agent/review_baseline.txt", "invalid");
587
588        let result = load_review_baseline_with_workspace(&workspace);
589        assert!(result.is_err());
590        assert_eq!(result.unwrap_err().kind(), io::ErrorKind::InvalidData);
591    }
592
593    #[test]
594    fn test_write_review_baseline_with_workspace() {
595        use crate::workspace::MemoryWorkspace;
596
597        let workspace = MemoryWorkspace::new_test();
598
599        write_review_baseline_with_workspace(&workspace, "abc123").unwrap();
600
601        let content = workspace.get_file(".agent/review_baseline.txt").unwrap();
602        assert_eq!(content, "abc123");
603    }
604}