Skip to main content

ralph_workflow/checkpoint/
validation.rs

1//! Checkpoint validation for resume functionality.
2//!
3//! This module provides validation for checkpoint state before resuming,
4//! ensuring the environment matches the checkpoint and detecting configuration changes.
5
6use crate::agents::AgentRegistry;
7use crate::checkpoint::state::{
8    calculate_file_checksum_with_workspace, AgentConfigSnapshot, PipelineCheckpoint,
9};
10use crate::config::Config;
11use crate::workspace::Workspace;
12use std::path::Path;
13
14/// Result of checkpoint validation.
15#[derive(Debug)]
16pub struct ValidationResult {
17    /// Whether the checkpoint is valid for resume.
18    pub is_valid: bool,
19    /// Warnings that don't prevent resume but should be shown.
20    pub warnings: Vec<String>,
21    /// Errors that prevent resume.
22    pub errors: Vec<String>,
23}
24
25impl ValidationResult {
26    /// Create a successful validation result with no issues.
27    #[must_use]
28    pub const fn ok() -> Self {
29        Self {
30            is_valid: true,
31            warnings: Vec::new(),
32            errors: Vec::new(),
33        }
34    }
35
36    /// Create a validation result with a single error.
37    pub fn error(msg: impl Into<String>) -> Self {
38        Self {
39            is_valid: false,
40            warnings: Vec::new(),
41            errors: vec![msg.into()],
42        }
43    }
44
45    /// Add a warning to the result.
46    #[must_use]
47    pub fn with_warning(mut self, msg: impl Into<String>) -> Self {
48        self.warnings.push(msg.into());
49        self
50    }
51
52    /// Merge another validation result into this one.
53    #[must_use]
54    pub fn merge(mut self, other: Self) -> Self {
55        if !other.is_valid {
56            self.is_valid = false;
57        }
58        self.warnings.extend(other.warnings);
59        self.errors.extend(other.errors);
60        self
61    }
62}
63
64/// Validate a checkpoint before resuming.
65///
66/// Performs comprehensive validation to ensure the checkpoint can be safely resumed:
67/// - Working directory matches
68/// - PROMPT.md hasn't changed (if checksum available)
69/// - Agent configurations are compatible
70///
71/// Note: File system state validation is handled separately with recovery strategy
72/// in the resume flow (see `validate_file_system_state_with_strategy`).
73///
74/// # Arguments
75///
76/// * `checkpoint` - The checkpoint to validate
77/// * `current_config` - Current configuration to compare against
78/// * `registry` - Agent registry for agent validation
79/// * `workspace` - Workspace for explicit path resolution
80///
81/// # Returns
82///
83/// A `ValidationResult` with any warnings or errors found.
84pub fn validate_checkpoint(
85    checkpoint: &PipelineCheckpoint,
86    current_config: &Config,
87    registry: &AgentRegistry,
88    workspace: &dyn Workspace,
89) -> ValidationResult {
90    let mut result = ValidationResult::ok();
91
92    // Validate working directory
93    result = result.merge(validate_working_directory(checkpoint, workspace));
94
95    // Validate PROMPT.md checksum
96    result = result.merge(validate_prompt_md(checkpoint, workspace));
97
98    // Validate agent configurations
99    result = result.merge(validate_agent_config(
100        &checkpoint.developer_agent_config,
101        &checkpoint.developer_agent,
102        registry,
103    ));
104    result = result.merge(validate_agent_config(
105        &checkpoint.reviewer_agent_config,
106        &checkpoint.reviewer_agent,
107        registry,
108    ));
109
110    // Check for iteration count mismatches (warning only)
111    result = result.merge(validate_iteration_counts(checkpoint, current_config));
112
113    // Note: File system state validation is NOT included here because it requires
114    // recovery strategy handling. It's called separately in the resume flow.
115
116    result
117}
118
119/// Validate that the working directory matches the checkpoint.
120///
121/// Uses the workspace root for current working directory comparison.
122/// Rejects legacy checkpoints that have no working directory.
123pub fn validate_working_directory(
124    checkpoint: &PipelineCheckpoint,
125    workspace: &dyn Workspace,
126) -> ValidationResult {
127    if checkpoint.working_dir.is_empty() {
128        return ValidationResult::error(
129            "Checkpoint has no working directory recorded. Legacy checkpoints are not supported. \
130             Delete the checkpoint and restart the pipeline."
131                .to_string(),
132        );
133    }
134
135    let current_dir = workspace.root().to_string_lossy().to_string();
136
137    if current_dir != checkpoint.working_dir {
138        return ValidationResult::error(format!(
139            "Working directory mismatch: checkpoint was created in '{}', but current directory is '{}'",
140            checkpoint.working_dir, current_dir
141        ));
142    }
143
144    ValidationResult::ok()
145}
146
147/// Validate that PROMPT.md hasn't changed since checkpoint.
148///
149/// Rejects legacy checkpoints that have no PROMPT.md checksum.
150pub fn validate_prompt_md(
151    checkpoint: &PipelineCheckpoint,
152    workspace: &dyn Workspace,
153) -> ValidationResult {
154    let Some(ref saved_checksum) = checkpoint.prompt_md_checksum else {
155        return ValidationResult::error(
156            "Checkpoint has no PROMPT.md checksum. Legacy checkpoints are not supported. \
157             Delete the checkpoint and restart the pipeline."
158                .to_string(),
159        );
160    };
161
162    let current_checksum =
163        calculate_file_checksum_with_workspace(workspace, Path::new("PROMPT.md"));
164
165    match current_checksum {
166        Some(current) if current == *saved_checksum => ValidationResult::ok(),
167        Some(current) => ValidationResult::ok().with_warning(format!(
168            "PROMPT.md has changed since checkpoint was created (checksum: {} -> {})",
169            &saved_checksum[..8],
170            &current[..8]
171        )),
172        None => ValidationResult::ok()
173            .with_warning("PROMPT.md not found or unreadable - cannot verify integrity"),
174    }
175}
176
177/// Validate that an agent configuration matches the current registry.
178///
179/// Rejects legacy checkpoints that have empty agent commands.
180#[must_use]
181pub fn validate_agent_config(
182    saved_config: &AgentConfigSnapshot,
183    agent_name: &str,
184    registry: &AgentRegistry,
185) -> ValidationResult {
186    // Reject legacy checkpoints with empty commands
187    if saved_config.cmd.is_empty() {
188        return ValidationResult::error(format!(
189            "Checkpoint has empty agent command for '{agent_name}'. Legacy checkpoints are not supported. \
190             Delete the checkpoint and restart the pipeline."
191        ));
192    }
193
194    let Some(current_config) = registry.resolve_config(agent_name) else {
195        return ValidationResult::ok().with_warning(format!(
196            "Agent '{agent_name}' not found in current registry (may have been removed)"
197        ));
198    };
199
200    let mut result = ValidationResult::ok();
201
202    // Check command
203    if current_config.cmd != saved_config.cmd {
204        result = result.with_warning(format!(
205            "Agent '{}' command changed: '{}' -> '{}'",
206            agent_name, saved_config.cmd, current_config.cmd
207        ));
208    }
209
210    // Check output flag
211    if current_config.output_flag != saved_config.output_flag {
212        result = result.with_warning(format!(
213            "Agent '{}' output flag changed: '{}' -> '{}'",
214            agent_name, saved_config.output_flag, current_config.output_flag
215        ));
216    }
217
218    // Check can_commit flag
219    if current_config.can_commit != saved_config.can_commit {
220        result = result.with_warning(format!(
221            "Agent '{}' can_commit flag changed: {} -> {}",
222            agent_name, saved_config.can_commit, current_config.can_commit
223        ));
224    }
225
226    result
227}
228
229/// Validate iteration counts between checkpoint and current config.
230///
231/// This is a soft validation - mismatches generate warnings but don't block resume.
232/// The checkpoint values take precedence during resume.
233#[must_use]
234pub fn validate_iteration_counts(
235    checkpoint: &PipelineCheckpoint,
236    current_config: &Config,
237) -> ValidationResult {
238    let mut result = ValidationResult::ok();
239
240    // Check developer iterations
241    let saved_dev_iters = checkpoint.cli_args.developer_iters;
242    if saved_dev_iters > 0 && saved_dev_iters != current_config.developer_iters {
243        result = result.with_warning(format!(
244            "Developer iterations changed: {} (checkpoint) vs {} (current config). Using checkpoint value.",
245            saved_dev_iters, current_config.developer_iters
246        ));
247    }
248
249    // Check reviewer reviews
250    let saved_rev_reviews = checkpoint.cli_args.reviewer_reviews;
251    if saved_rev_reviews > 0 && saved_rev_reviews != current_config.reviewer_reviews {
252        result = result.with_warning(format!(
253            "Reviewer reviews changed: {} (checkpoint) vs {} (current config). Using checkpoint value.",
254            saved_rev_reviews, current_config.reviewer_reviews
255        ));
256    }
257
258    result
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264    use crate::checkpoint::state::{CheckpointParams, CliArgsSnapshot, PipelinePhase, RebaseState};
265    use crate::workspace::MemoryWorkspace;
266
267    fn make_test_checkpoint() -> PipelineCheckpoint {
268        let cli_args = CliArgsSnapshot::new(5, 2, None, true, 2, false, None);
269        let dev_config =
270            AgentConfigSnapshot::new("claude".into(), "claude".into(), "-p".into(), None, true);
271        let rev_config =
272            AgentConfigSnapshot::new("codex".into(), "codex".into(), "-p".into(), None, true);
273        let run_id = uuid::Uuid::new_v4().to_string();
274
275        PipelineCheckpoint::from_params(CheckpointParams {
276            phase: PipelinePhase::Development,
277            iteration: 2,
278            total_iterations: 5,
279            reviewer_pass: 0,
280            total_reviewer_passes: 2,
281            developer_agent: "claude",
282            reviewer_agent: "codex",
283            cli_args,
284            developer_agent_config: dev_config,
285            reviewer_agent_config: rev_config,
286            rebase_state: RebaseState::default(),
287            git_user_name: None,
288            git_user_email: None,
289            run_id: &run_id,
290            parent_run_id: None,
291            resume_count: 0,
292            actual_developer_runs: 2,
293            actual_reviewer_runs: 0,
294            working_dir: "/test/repo".to_string(),
295            prompt_md_checksum: None,
296            config_path: None,
297            config_checksum: None,
298        })
299    }
300
301    #[test]
302    fn test_validation_result_ok() {
303        let result = ValidationResult::ok();
304        assert!(result.is_valid);
305        assert!(result.warnings.is_empty());
306        assert!(result.errors.is_empty());
307    }
308
309    #[test]
310    fn test_validation_result_error() {
311        let result = ValidationResult::error("test error");
312        assert!(!result.is_valid);
313        assert!(result.warnings.is_empty());
314        assert_eq!(result.errors.len(), 1);
315        assert_eq!(result.errors[0], "test error");
316    }
317
318    #[test]
319    fn test_validation_result_with_warning() {
320        let result = ValidationResult::ok().with_warning("test warning");
321        assert!(result.is_valid);
322        assert_eq!(result.warnings.len(), 1);
323        assert_eq!(result.warnings[0], "test warning");
324    }
325
326    #[test]
327    fn test_validation_result_merge() {
328        let result1 = ValidationResult::ok().with_warning("warning 1");
329        let result2 = ValidationResult::ok().with_warning("warning 2");
330
331        let merged = result1.merge(result2);
332        assert!(merged.is_valid);
333        assert_eq!(merged.warnings.len(), 2);
334    }
335
336    #[test]
337    fn test_validation_result_merge_with_error() {
338        let result1 = ValidationResult::ok();
339        let result2 = ValidationResult::error("error");
340
341        let merged = result1.merge(result2);
342        assert!(!merged.is_valid);
343        assert_eq!(merged.errors.len(), 1);
344    }
345
346    #[test]
347    fn test_validate_working_directory_empty_rejects_legacy() {
348        let mut checkpoint = make_test_checkpoint();
349        checkpoint.working_dir = String::new();
350        let workspace = MemoryWorkspace::new_test();
351
352        let result = validate_working_directory(&checkpoint, &workspace);
353        assert!(
354            !result.is_valid,
355            "Empty working_dir should reject legacy checkpoint"
356        );
357        assert_eq!(result.errors.len(), 1);
358        assert!(result.errors[0].contains("Legacy checkpoints are not supported"));
359    }
360
361    #[test]
362    fn test_validate_working_directory_mismatch() {
363        let mut checkpoint = make_test_checkpoint();
364        checkpoint.working_dir = "/some/other/directory".to_string();
365        let workspace = MemoryWorkspace::new_test();
366
367        let result = validate_working_directory(&checkpoint, &workspace);
368        assert!(
369            !result.is_valid,
370            "Should fail validation on working_dir mismatch"
371        );
372        assert_eq!(result.errors.len(), 1);
373        assert!(result.errors[0].contains("Working directory mismatch"));
374    }
375
376    #[test]
377    fn test_validate_prompt_md_no_checksum_rejects_legacy() {
378        let mut checkpoint = make_test_checkpoint();
379        checkpoint.prompt_md_checksum = None;
380        let workspace = MemoryWorkspace::new_test();
381
382        let result = validate_prompt_md(&checkpoint, &workspace);
383        assert!(
384            !result.is_valid,
385            "Missing PROMPT.md checksum should reject legacy checkpoint"
386        );
387        assert_eq!(result.errors.len(), 1);
388        assert!(result.errors[0].contains("Legacy checkpoints are not supported"));
389    }
390}