ralph_workflow/checkpoint/
execution_history.rs

1//! Execution history tracking for checkpoint state.
2//!
3//! This module provides structures for tracking the execution history of a pipeline,
4//! enabling idempotent recovery and validation of state.
5
6use crate::checkpoint::timestamp;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Outcome of an execution step.
11#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
12pub enum StepOutcome {
13    /// Step completed successfully
14    Success {
15        /// Optional output data (for small outputs)
16        output: Option<String>,
17        /// Files modified by this step
18        files_modified: Vec<String>,
19        /// Exit code if applicable (0 for success)
20        #[serde(default)]
21        exit_code: Option<i32>,
22    },
23    /// Step failed with error
24    Failure {
25        /// Error message
26        error: String,
27        /// Whether this is recoverable
28        recoverable: bool,
29        /// Exit code if applicable (non-zero for failure)
30        #[serde(default)]
31        exit_code: Option<i32>,
32        /// Signals received during execution (e.g., "SIGINT", "SIGTERM")
33        #[serde(default)]
34        signals: Vec<String>,
35    },
36    /// Step partially completed (may need retry)
37    Partial {
38        /// What was completed
39        completed: String,
40        /// What remains
41        remaining: String,
42        /// Exit code if applicable
43        #[serde(default)]
44        exit_code: Option<i32>,
45    },
46    /// Step was skipped (e.g., already done)
47    Skipped {
48        /// Reason for skipping
49        reason: String,
50    },
51}
52
53impl StepOutcome {
54    /// Create a Success outcome with default values.
55    pub fn success(output: Option<String>, files_modified: Vec<String>) -> Self {
56        Self::Success {
57            output,
58            files_modified,
59            exit_code: Some(0),
60        }
61    }
62
63    /// Create a Failure outcome with default values.
64    pub fn failure(error: String, recoverable: bool) -> Self {
65        Self::Failure {
66            error,
67            recoverable,
68            exit_code: None,
69            signals: Vec::new(),
70        }
71    }
72
73    /// Create a Partial outcome with default values.
74    pub fn partial(completed: String, remaining: String) -> Self {
75        Self::Partial {
76            completed,
77            remaining,
78            exit_code: None,
79        }
80    }
81
82    /// Create a Skipped outcome.
83    pub fn skipped(reason: String) -> Self {
84        Self::Skipped { reason }
85    }
86}
87
88/// Detailed information about files modified in a step.
89#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
90pub struct ModifiedFilesDetail {
91    /// Files added
92    #[serde(default)]
93    pub added: Vec<String>,
94    /// Files modified
95    #[serde(default)]
96    pub modified: Vec<String>,
97    /// Files deleted
98    #[serde(default)]
99    pub deleted: Vec<String>,
100}
101
102/// Summary of issues found and fixed during a step.
103#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
104pub struct IssuesSummary {
105    /// Number of issues found
106    #[serde(default)]
107    pub found: u32,
108    /// Number of issues fixed
109    #[serde(default)]
110    pub fixed: u32,
111    /// Description of issues (e.g., "3 clippy warnings, 2 test failures")
112    #[serde(default)]
113    pub description: Option<String>,
114}
115
116/// A single execution step in the pipeline history.
117#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
118pub struct ExecutionStep {
119    /// Phase this step belongs to
120    pub phase: String,
121    /// Iteration number (for development/review iterations)
122    pub iteration: u32,
123    /// Type of step (e.g., "review", "fix", "commit")
124    pub step_type: String,
125    /// When this step was executed (ISO 8601 format string)
126    pub timestamp: String,
127    /// Outcome of the step
128    pub outcome: StepOutcome,
129    /// Agent that executed this step
130    pub agent: Option<String>,
131    /// Duration in seconds (if available)
132    pub duration_secs: Option<u64>,
133    /// When a checkpoint was saved during this step (ISO 8601 format string)
134    #[serde(default)]
135    pub checkpoint_saved_at: Option<String>,
136    /// Git commit OID created during this step (if any)
137    #[serde(default)]
138    pub git_commit_oid: Option<String>,
139    /// Detailed information about files modified
140    #[serde(default)]
141    pub modified_files_detail: Option<ModifiedFilesDetail>,
142    /// The prompt text used for this step (for deterministic replay)
143    #[serde(default)]
144    pub prompt_used: Option<String>,
145    /// Issues summary (found and fixed counts)
146    #[serde(default)]
147    pub issues_summary: Option<IssuesSummary>,
148}
149
150impl ExecutionStep {
151    /// Create a new execution step.
152    pub fn new(phase: &str, iteration: u32, step_type: &str, outcome: StepOutcome) -> Self {
153        Self {
154            phase: phase.to_string(),
155            iteration,
156            step_type: step_type.to_string(),
157            timestamp: timestamp(),
158            outcome,
159            agent: None,
160            duration_secs: None,
161            checkpoint_saved_at: None,
162            git_commit_oid: None,
163            modified_files_detail: None,
164            prompt_used: None,
165            issues_summary: None,
166        }
167    }
168
169    /// Set the agent that executed this step.
170    pub fn with_agent(mut self, agent: &str) -> Self {
171        self.agent = Some(agent.to_string());
172        self
173    }
174
175    /// Set the duration of this step.
176    pub fn with_duration(mut self, duration_secs: u64) -> Self {
177        self.duration_secs = Some(duration_secs);
178        self
179    }
180
181    /// Set the git commit OID created during this step.
182    pub fn with_git_commit_oid(mut self, oid: &str) -> Self {
183        self.git_commit_oid = Some(oid.to_string());
184        self
185    }
186}
187
188/// Default threshold for storing file content in snapshots (10KB).
189///
190/// Files smaller than this threshold will have their full content stored
191/// in the checkpoint for automatic recovery on resume.
192const DEFAULT_CONTENT_THRESHOLD: u64 = 10 * 1024;
193
194/// Maximum file size that will be compressed in snapshots (100KB).
195///
196/// Files between DEFAULT_CONTENT_THRESHOLD and this size that are key files
197/// (PROMPT.md, PLAN.md, ISSUES.md) will be compressed before storing.
198const MAX_COMPRESS_SIZE: u64 = 100 * 1024;
199
200/// Snapshot of a file's state at a point in time.
201#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
202pub struct FileSnapshot {
203    /// Path to the file
204    pub path: String,
205    /// SHA-256 checksum of file contents
206    pub checksum: String,
207    /// File size in bytes
208    pub size: u64,
209    /// For small files (< 10KB by default), store full content
210    pub content: Option<String>,
211    /// Compressed content (base64-encoded gzip) for larger key files
212    pub compressed_content: Option<String>,
213    /// Whether the file existed
214    pub exists: bool,
215}
216
217impl FileSnapshot {
218    /// Create a new file snapshot with the default content threshold (10KB).
219    pub fn new(path: &str, checksum: String, size: u64, exists: bool) -> Self {
220        Self::with_max_size(path, checksum, size, exists, DEFAULT_CONTENT_THRESHOLD)
221    }
222
223    /// Create a new file snapshot with a custom content threshold.
224    ///
225    /// Files smaller than `max_size` bytes will have their content stored.
226    /// Key files (PROMPT.md, PLAN.md, ISSUES.md) may be compressed if they
227    /// are between max_size and MAX_COMPRESS_SIZE.
228    pub fn with_max_size(
229        path: &str,
230        checksum: String,
231        size: u64,
232        exists: bool,
233        max_size: u64,
234    ) -> Self {
235        let mut content = None;
236        let mut compressed_content = None;
237
238        if exists {
239            let is_key_file = path.contains("PROMPT.md")
240                || path.contains("PLAN.md")
241                || path.contains("ISSUES.md")
242                || path.contains("NOTES.md");
243
244            if size < max_size {
245                // For small files, read and store content directly
246                content = std::fs::read_to_string(path).ok();
247            } else if is_key_file && size < MAX_COMPRESS_SIZE {
248                // For larger key files, compress the content
249                if let Ok(data) = std::fs::read(path) {
250                    compressed_content = compress_data(&data).ok();
251                }
252            }
253        }
254
255        Self {
256            path: path.to_string(),
257            checksum,
258            size,
259            content,
260            compressed_content,
261            exists,
262        }
263    }
264
265    /// Get the file content, decompressing if necessary.
266    pub fn get_content(&self) -> Option<String> {
267        if let Some(ref content) = self.content {
268            Some(content.clone())
269        } else if let Some(ref compressed) = self.compressed_content {
270            decompress_data(compressed).ok()
271        } else {
272            None
273        }
274    }
275
276    /// Create a snapshot for a non-existent file.
277    pub fn not_found(path: &str) -> Self {
278        Self {
279            path: path.to_string(),
280            checksum: String::new(),
281            size: 0,
282            content: None,
283            compressed_content: None,
284            exists: false,
285        }
286    }
287
288    /// Verify that the current file state matches this snapshot.
289    pub fn verify(&self) -> bool {
290        if !self.exists {
291            return !std::path::Path::new(&self.path).exists();
292        }
293
294        let Ok(content) = std::fs::read(&self.path) else {
295            return false;
296        };
297
298        if content.len() as u64 != self.size {
299            return false;
300        }
301
302        let checksum =
303            crate::checkpoint::state::calculate_file_checksum(std::path::Path::new(&self.path));
304
305        match checksum {
306            Some(actual) => actual == self.checksum,
307            None => false,
308        }
309    }
310}
311
312/// Compress data using gzip and encode as base64.
313///
314/// This is used to store larger file content in checkpoints without
315/// bloating the checkpoint file size too much.
316fn compress_data(data: &[u8]) -> Result<String, std::io::Error> {
317    use base64::{engine::general_purpose::STANDARD, Engine};
318    use flate2::write::GzEncoder;
319    use flate2::Compression;
320    use std::io::Write;
321
322    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
323    encoder.write_all(data)?;
324    let compressed = encoder.finish()?;
325
326    Ok(STANDARD.encode(&compressed))
327}
328
329/// Decompress data that was compressed with compress_data.
330fn decompress_data(encoded: &str) -> Result<String, std::io::Error> {
331    use base64::{engine::general_purpose::STANDARD, Engine};
332    use flate2::read::GzDecoder;
333    use std::io::Read;
334
335    let compressed = STANDARD.decode(encoded).map_err(|e| {
336        std::io::Error::new(
337            std::io::ErrorKind::InvalidData,
338            format!("Base64 decode error: {}", e),
339        )
340    })?;
341
342    let mut decoder = GzDecoder::new(compressed.as_slice());
343    let mut decompressed = Vec::new();
344    decoder.read_to_end(&mut decompressed)?;
345
346    String::from_utf8(decompressed).map_err(|e| {
347        std::io::Error::new(
348            std::io::ErrorKind::InvalidData,
349            format!("UTF-8 decode error: {}", e),
350        )
351    })
352}
353
354/// Execution history tracking.
355#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
356pub struct ExecutionHistory {
357    /// All execution steps in order
358    pub steps: Vec<ExecutionStep>,
359    /// File snapshots for key files at checkpoint time
360    pub file_snapshots: HashMap<String, FileSnapshot>,
361}
362
363impl ExecutionHistory {
364    /// Create a new execution history.
365    pub fn new() -> Self {
366        Self::default()
367    }
368
369    /// Add an execution step.
370    pub fn add_step(&mut self, step: ExecutionStep) {
371        self.steps.push(step);
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378
379    #[test]
380    fn test_execution_step_new() {
381        let outcome = StepOutcome::success(None, vec!["test.txt".to_string()]);
382
383        let step = ExecutionStep::new("Development", 1, "dev_run", outcome);
384
385        assert_eq!(step.phase, "Development");
386        assert_eq!(step.iteration, 1);
387        assert_eq!(step.step_type, "dev_run");
388        assert!(step.agent.is_none());
389        assert!(step.duration_secs.is_none());
390        // Verify new fields are None by default
391        assert!(step.git_commit_oid.is_none());
392        assert!(step.modified_files_detail.is_none());
393        assert!(step.prompt_used.is_none());
394        assert!(step.issues_summary.is_none());
395    }
396
397    #[test]
398    fn test_execution_step_with_agent() {
399        let outcome = StepOutcome::success(None, vec![]);
400
401        let step = ExecutionStep::new("Development", 1, "dev_run", outcome)
402            .with_agent("claude")
403            .with_duration(120);
404
405        assert_eq!(step.agent, Some("claude".to_string()));
406        assert_eq!(step.duration_secs, Some(120));
407    }
408
409    #[test]
410    fn test_execution_step_new_fields_default() {
411        let outcome = StepOutcome::success(None, vec![]);
412        let step = ExecutionStep::new("Development", 1, "dev_run", outcome);
413
414        // Verify new fields are None by default
415        assert!(step.git_commit_oid.is_none());
416        assert!(step.modified_files_detail.is_none());
417        assert!(step.prompt_used.is_none());
418        assert!(step.issues_summary.is_none());
419    }
420
421    #[test]
422    fn test_modified_files_detail_default() {
423        let detail = ModifiedFilesDetail::default();
424        assert!(detail.added.is_empty());
425        assert!(detail.modified.is_empty());
426        assert!(detail.deleted.is_empty());
427    }
428
429    #[test]
430    fn test_issues_summary_default() {
431        let summary = IssuesSummary::default();
432        assert_eq!(summary.found, 0);
433        assert_eq!(summary.fixed, 0);
434        assert!(summary.description.is_none());
435    }
436
437    #[test]
438    fn test_file_snapshot() {
439        let snapshot = FileSnapshot::new("test.txt", "abc123".to_string(), 100, true);
440
441        assert_eq!(snapshot.path, "test.txt");
442        assert_eq!(snapshot.checksum, "abc123");
443        assert_eq!(snapshot.size, 100);
444        assert!(snapshot.exists);
445    }
446
447    #[test]
448    fn test_file_snapshot_not_found() {
449        let snapshot = FileSnapshot::not_found("missing.txt");
450
451        assert_eq!(snapshot.path, "missing.txt");
452        assert!(!snapshot.exists);
453        assert_eq!(snapshot.size, 0);
454    }
455
456    #[test]
457    fn test_execution_history_add_step() {
458        let mut history = ExecutionHistory::new();
459        let outcome = StepOutcome::success(None, vec![]);
460
461        let step = ExecutionStep::new("Development", 1, "dev_run", outcome);
462
463        history.add_step(step);
464
465        assert_eq!(history.steps.len(), 1);
466        assert_eq!(history.steps[0].phase, "Development");
467        assert_eq!(history.steps[0].iteration, 1);
468    }
469
470    #[test]
471    fn test_execution_step_serialization_with_new_fields() {
472        // Create a step with new fields via JSON to test backward compatibility
473        let json_str = r#"{
474            "phase": "Review",
475            "iteration": 1,
476            "step_type": "review",
477            "timestamp": "2025-01-20 12:00:00",
478            "outcome": {"Success": {"output": null, "files_modified": [], "exit_code": 0}},
479            "agent": null,
480            "duration_secs": null,
481            "checkpoint_saved_at": null,
482            "git_commit_oid": "abc123",
483            "modified_files_detail": {
484                "added": ["a.rs"],
485                "modified": [],
486                "deleted": []
487            },
488            "prompt_used": "Fix issues",
489            "issues_summary": {
490                "found": 2,
491                "fixed": 2,
492                "description": "All fixed"
493            }
494        }"#;
495
496        let deserialized: ExecutionStep = serde_json::from_str(json_str).unwrap();
497
498        assert_eq!(deserialized.git_commit_oid, Some("abc123".to_string()));
499        assert_eq!(
500            deserialized.modified_files_detail.as_ref().unwrap().added,
501            vec!["a.rs"]
502        );
503        assert_eq!(deserialized.prompt_used, Some("Fix issues".to_string()));
504        assert_eq!(deserialized.issues_summary.as_ref().unwrap().found, 2);
505    }
506}