Skip to main content

ralph_workflow/checkpoint/
validation.rs

1//! Checkpoint validation for resume functionality.
2//!
3//! This module provides validation for checkpoint state before resuming,
4//! ensuring the environment matches the checkpoint and detecting configuration changes.
5
6use crate::agents::AgentRegistry;
7use crate::checkpoint::state::{
8    calculate_file_checksum_with_workspace, AgentConfigSnapshot, PipelineCheckpoint,
9};
10use crate::config::Config;
11use crate::workspace::Workspace;
12use std::path::Path;
13
14/// Result of checkpoint validation.
15#[derive(Debug)]
16pub struct ValidationResult {
17    /// Whether the checkpoint is valid for resume.
18    pub is_valid: bool,
19    /// Warnings that don't prevent resume but should be shown.
20    pub warnings: Vec<String>,
21    /// Errors that prevent resume.
22    pub errors: Vec<String>,
23}
24
25impl ValidationResult {
26    /// Create a successful validation result with no issues.
27    pub fn ok() -> Self {
28        Self {
29            is_valid: true,
30            warnings: Vec::new(),
31            errors: Vec::new(),
32        }
33    }
34
35    /// Create a validation result with a single error.
36    pub fn error(msg: impl Into<String>) -> Self {
37        Self {
38            is_valid: false,
39            warnings: Vec::new(),
40            errors: vec![msg.into()],
41        }
42    }
43
44    /// Add a warning to the result.
45    pub fn with_warning(mut self, msg: impl Into<String>) -> Self {
46        self.warnings.push(msg.into());
47        self
48    }
49
50    /// Merge another validation result into this one.
51    pub fn merge(mut self, other: ValidationResult) -> Self {
52        if !other.is_valid {
53            self.is_valid = false;
54        }
55        self.warnings.extend(other.warnings);
56        self.errors.extend(other.errors);
57        self
58    }
59}
60
61/// Validate a checkpoint before resuming.
62///
63/// Performs comprehensive validation to ensure the checkpoint can be safely resumed:
64/// - Working directory matches
65/// - PROMPT.md hasn't changed (if checksum available)
66/// - Agent configurations are compatible
67///
68/// Note: File system state validation is handled separately with recovery strategy
69/// in the resume flow (see validate_file_system_state_with_strategy).
70///
71/// # Arguments
72///
73/// * `checkpoint` - The checkpoint to validate
74/// * `current_config` - Current configuration to compare against
75/// * `registry` - Agent registry for agent validation
76/// * `workspace` - Workspace for explicit path resolution
77///
78/// # Returns
79///
80/// A `ValidationResult` with any warnings or errors found.
81pub fn validate_checkpoint(
82    checkpoint: &PipelineCheckpoint,
83    current_config: &Config,
84    registry: &AgentRegistry,
85    workspace: &dyn Workspace,
86) -> ValidationResult {
87    let mut result = ValidationResult::ok();
88
89    // Validate working directory
90    result = result.merge(validate_working_directory(checkpoint, workspace));
91
92    // Validate PROMPT.md checksum
93    result = result.merge(validate_prompt_md(checkpoint, workspace));
94
95    // Validate agent configurations
96    result = result.merge(validate_agent_config(
97        &checkpoint.developer_agent_config,
98        &checkpoint.developer_agent,
99        registry,
100    ));
101    result = result.merge(validate_agent_config(
102        &checkpoint.reviewer_agent_config,
103        &checkpoint.reviewer_agent,
104        registry,
105    ));
106
107    // Check for iteration count mismatches (warning only)
108    result = result.merge(validate_iteration_counts(checkpoint, current_config));
109
110    // Note: File system state validation is NOT included here because it requires
111    // recovery strategy handling. It's called separately in the resume flow.
112
113    result
114}
115
116/// Validate that the working directory matches the checkpoint.
117///
118/// Uses the workspace root for current working directory comparison.
119/// Rejects legacy checkpoints that have no working directory.
120pub fn validate_working_directory(
121    checkpoint: &PipelineCheckpoint,
122    workspace: &dyn Workspace,
123) -> ValidationResult {
124    if checkpoint.working_dir.is_empty() {
125        return ValidationResult::error(
126            "Checkpoint has no working directory recorded. Legacy checkpoints are not supported. \
127             Delete the checkpoint and restart the pipeline."
128                .to_string(),
129        );
130    }
131
132    let current_dir = workspace.root().to_string_lossy().to_string();
133
134    if current_dir != checkpoint.working_dir {
135        return ValidationResult::error(format!(
136            "Working directory mismatch: checkpoint was created in '{}', but current directory is '{}'",
137            checkpoint.working_dir, current_dir
138        ));
139    }
140
141    ValidationResult::ok()
142}
143
144/// Validate that PROMPT.md hasn't changed since checkpoint.
145///
146/// Rejects legacy checkpoints that have no PROMPT.md checksum.
147pub fn validate_prompt_md(
148    checkpoint: &PipelineCheckpoint,
149    workspace: &dyn Workspace,
150) -> ValidationResult {
151    let Some(ref saved_checksum) = checkpoint.prompt_md_checksum else {
152        return ValidationResult::error(
153            "Checkpoint has no PROMPT.md checksum. Legacy checkpoints are not supported. \
154             Delete the checkpoint and restart the pipeline."
155                .to_string(),
156        );
157    };
158
159    let current_checksum =
160        calculate_file_checksum_with_workspace(workspace, Path::new("PROMPT.md"));
161
162    match current_checksum {
163        Some(current) if current == *saved_checksum => ValidationResult::ok(),
164        Some(current) => ValidationResult::ok().with_warning(format!(
165            "PROMPT.md has changed since checkpoint was created (checksum: {} -> {})",
166            &saved_checksum[..8],
167            &current[..8]
168        )),
169        None => ValidationResult::ok()
170            .with_warning("PROMPT.md not found or unreadable - cannot verify integrity"),
171    }
172}
173
174/// Validate that an agent configuration matches the current registry.
175///
176/// Rejects legacy checkpoints that have empty agent commands.
177pub fn validate_agent_config(
178    saved_config: &AgentConfigSnapshot,
179    agent_name: &str,
180    registry: &AgentRegistry,
181) -> ValidationResult {
182    // Reject legacy checkpoints with empty commands
183    if saved_config.cmd.is_empty() {
184        return ValidationResult::error(format!(
185            "Checkpoint has empty agent command for '{}'. Legacy checkpoints are not supported. \
186             Delete the checkpoint and restart the pipeline.",
187            agent_name
188        ));
189    }
190
191    let Some(current_config) = registry.resolve_config(agent_name) else {
192        return ValidationResult::ok().with_warning(format!(
193            "Agent '{}' not found in current registry (may have been removed)",
194            agent_name
195        ));
196    };
197
198    let mut result = ValidationResult::ok();
199
200    // Check command
201    if current_config.cmd != saved_config.cmd {
202        result = result.with_warning(format!(
203            "Agent '{}' command changed: '{}' -> '{}'",
204            agent_name, saved_config.cmd, current_config.cmd
205        ));
206    }
207
208    // Check output flag
209    if current_config.output_flag != saved_config.output_flag {
210        result = result.with_warning(format!(
211            "Agent '{}' output flag changed: '{}' -> '{}'",
212            agent_name, saved_config.output_flag, current_config.output_flag
213        ));
214    }
215
216    // Check can_commit flag
217    if current_config.can_commit != saved_config.can_commit {
218        result = result.with_warning(format!(
219            "Agent '{}' can_commit flag changed: {} -> {}",
220            agent_name, saved_config.can_commit, current_config.can_commit
221        ));
222    }
223
224    result
225}
226
227/// Validate iteration counts between checkpoint and current config.
228///
229/// This is a soft validation - mismatches generate warnings but don't block resume.
230/// The checkpoint values take precedence during resume.
231pub fn validate_iteration_counts(
232    checkpoint: &PipelineCheckpoint,
233    current_config: &Config,
234) -> ValidationResult {
235    let mut result = ValidationResult::ok();
236
237    // Check developer iterations
238    let saved_dev_iters = checkpoint.cli_args.developer_iters;
239    if saved_dev_iters > 0 && saved_dev_iters != current_config.developer_iters {
240        result = result.with_warning(format!(
241            "Developer iterations changed: {} (checkpoint) vs {} (current config). Using checkpoint value.",
242            saved_dev_iters, current_config.developer_iters
243        ));
244    }
245
246    // Check reviewer reviews
247    let saved_rev_reviews = checkpoint.cli_args.reviewer_reviews;
248    if saved_rev_reviews > 0 && saved_rev_reviews != current_config.reviewer_reviews {
249        result = result.with_warning(format!(
250            "Reviewer reviews changed: {} (checkpoint) vs {} (current config). Using checkpoint value.",
251            saved_rev_reviews, current_config.reviewer_reviews
252        ));
253    }
254
255    result
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261    use crate::checkpoint::state::{CheckpointParams, CliArgsSnapshot, PipelinePhase, RebaseState};
262    use crate::workspace::MemoryWorkspace;
263
264    fn make_test_checkpoint() -> PipelineCheckpoint {
265        let cli_args = CliArgsSnapshot::new(5, 2, None, true, 2, false, None);
266        let dev_config =
267            AgentConfigSnapshot::new("claude".into(), "claude".into(), "-p".into(), None, true);
268        let rev_config =
269            AgentConfigSnapshot::new("codex".into(), "codex".into(), "-p".into(), None, true);
270        let run_id = uuid::Uuid::new_v4().to_string();
271
272        PipelineCheckpoint::from_params(CheckpointParams {
273            phase: PipelinePhase::Development,
274            iteration: 2,
275            total_iterations: 5,
276            reviewer_pass: 0,
277            total_reviewer_passes: 2,
278            developer_agent: "claude",
279            reviewer_agent: "codex",
280            cli_args,
281            developer_agent_config: dev_config,
282            reviewer_agent_config: rev_config,
283            rebase_state: RebaseState::default(),
284            git_user_name: None,
285            git_user_email: None,
286            run_id: &run_id,
287            parent_run_id: None,
288            resume_count: 0,
289            actual_developer_runs: 2,
290            actual_reviewer_runs: 0,
291            working_dir: "/test/repo".to_string(),
292            prompt_md_checksum: None,
293            config_path: None,
294            config_checksum: None,
295        })
296    }
297
298    #[test]
299    fn test_validation_result_ok() {
300        let result = ValidationResult::ok();
301        assert!(result.is_valid);
302        assert!(result.warnings.is_empty());
303        assert!(result.errors.is_empty());
304    }
305
306    #[test]
307    fn test_validation_result_error() {
308        let result = ValidationResult::error("test error");
309        assert!(!result.is_valid);
310        assert!(result.warnings.is_empty());
311        assert_eq!(result.errors.len(), 1);
312        assert_eq!(result.errors[0], "test error");
313    }
314
315    #[test]
316    fn test_validation_result_with_warning() {
317        let result = ValidationResult::ok().with_warning("test warning");
318        assert!(result.is_valid);
319        assert_eq!(result.warnings.len(), 1);
320        assert_eq!(result.warnings[0], "test warning");
321    }
322
323    #[test]
324    fn test_validation_result_merge() {
325        let result1 = ValidationResult::ok().with_warning("warning 1");
326        let result2 = ValidationResult::ok().with_warning("warning 2");
327
328        let merged = result1.merge(result2);
329        assert!(merged.is_valid);
330        assert_eq!(merged.warnings.len(), 2);
331    }
332
333    #[test]
334    fn test_validation_result_merge_with_error() {
335        let result1 = ValidationResult::ok();
336        let result2 = ValidationResult::error("error");
337
338        let merged = result1.merge(result2);
339        assert!(!merged.is_valid);
340        assert_eq!(merged.errors.len(), 1);
341    }
342
343    #[test]
344    fn test_validate_working_directory_empty_rejects_legacy() {
345        let mut checkpoint = make_test_checkpoint();
346        checkpoint.working_dir = String::new();
347        let workspace = MemoryWorkspace::new_test();
348
349        let result = validate_working_directory(&checkpoint, &workspace);
350        assert!(
351            !result.is_valid,
352            "Empty working_dir should reject legacy checkpoint"
353        );
354        assert_eq!(result.errors.len(), 1);
355        assert!(result.errors[0].contains("Legacy checkpoints are not supported"));
356    }
357
358    #[test]
359    fn test_validate_working_directory_mismatch() {
360        let mut checkpoint = make_test_checkpoint();
361        checkpoint.working_dir = "/some/other/directory".to_string();
362        let workspace = MemoryWorkspace::new_test();
363
364        let result = validate_working_directory(&checkpoint, &workspace);
365        assert!(
366            !result.is_valid,
367            "Should fail validation on working_dir mismatch"
368        );
369        assert_eq!(result.errors.len(), 1);
370        assert!(result.errors[0].contains("Working directory mismatch"));
371    }
372
373    #[test]
374    fn test_validate_prompt_md_no_checksum_rejects_legacy() {
375        let mut checkpoint = make_test_checkpoint();
376        checkpoint.prompt_md_checksum = None;
377        let workspace = MemoryWorkspace::new_test();
378
379        let result = validate_prompt_md(&checkpoint, &workspace);
380        assert!(
381            !result.is_valid,
382            "Missing PROMPT.md checksum should reject legacy checkpoint"
383        );
384        assert_eq!(result.errors.len(), 1);
385        assert!(result.errors[0].contains("Legacy checkpoints are not supported"));
386    }
387}