Skip to main content

ralph_workflow/checkpoint/
execution_history.rs

1//! Execution history tracking for checkpoint state.
2//!
3//! This module provides structures for tracking the execution history of a pipeline,
4//! enabling idempotent recovery and validation of state.
5
6use crate::checkpoint::timestamp;
7use crate::workspace::Workspace;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::path::Path;
11
12/// Outcome of an execution step.
13#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
14pub enum StepOutcome {
15    /// Step completed successfully
16    Success {
17        /// Optional output data (for small outputs)
18        output: Option<String>,
19        /// Files modified by this step
20        files_modified: Vec<String>,
21        /// Exit code if applicable (0 for success)
22        #[serde(default)]
23        exit_code: Option<i32>,
24    },
25    /// Step failed with error
26    Failure {
27        /// Error message
28        error: String,
29        /// Whether this is recoverable
30        recoverable: bool,
31        /// Exit code if applicable (non-zero for failure)
32        #[serde(default)]
33        exit_code: Option<i32>,
34        /// Signals received during execution (e.g., "SIGINT", "SIGTERM")
35        #[serde(default)]
36        signals: Vec<String>,
37    },
38    /// Step partially completed (may need retry)
39    Partial {
40        /// What was completed
41        completed: String,
42        /// What remains
43        remaining: String,
44        /// Exit code if applicable
45        #[serde(default)]
46        exit_code: Option<i32>,
47    },
48    /// Step was skipped (e.g., already done)
49    Skipped {
50        /// Reason for skipping
51        reason: String,
52    },
53}
54
55impl StepOutcome {
56    /// Create a Success outcome with default values.
57    pub fn success(output: Option<String>, files_modified: Vec<String>) -> Self {
58        Self::Success {
59            output,
60            files_modified,
61            exit_code: Some(0),
62        }
63    }
64
65    /// Create a Failure outcome with default values.
66    pub fn failure(error: String, recoverable: bool) -> Self {
67        Self::Failure {
68            error,
69            recoverable,
70            exit_code: None,
71            signals: Vec::new(),
72        }
73    }
74
75    /// Create a Partial outcome with default values.
76    pub fn partial(completed: String, remaining: String) -> Self {
77        Self::Partial {
78            completed,
79            remaining,
80            exit_code: None,
81        }
82    }
83
84    /// Create a Skipped outcome.
85    pub fn skipped(reason: String) -> Self {
86        Self::Skipped { reason }
87    }
88}
89
90/// Detailed information about files modified in a step.
91#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
92pub struct ModifiedFilesDetail {
93    /// Files added
94    #[serde(default)]
95    pub added: Vec<String>,
96    /// Files modified
97    #[serde(default)]
98    pub modified: Vec<String>,
99    /// Files deleted
100    #[serde(default)]
101    pub deleted: Vec<String>,
102}
103
104/// Summary of issues found and fixed during a step.
105#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
106pub struct IssuesSummary {
107    /// Number of issues found
108    #[serde(default)]
109    pub found: u32,
110    /// Number of issues fixed
111    #[serde(default)]
112    pub fixed: u32,
113    /// Description of issues (e.g., "3 clippy warnings, 2 test failures")
114    #[serde(default)]
115    pub description: Option<String>,
116}
117
118/// A single execution step in the pipeline history.
119#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
120pub struct ExecutionStep {
121    /// Phase this step belongs to
122    pub phase: String,
123    /// Iteration number (for development/review iterations)
124    pub iteration: u32,
125    /// Type of step (e.g., "review", "fix", "commit")
126    pub step_type: String,
127    /// When this step was executed (ISO 8601 format string)
128    pub timestamp: String,
129    /// Outcome of the step
130    pub outcome: StepOutcome,
131    /// Agent that executed this step
132    pub agent: Option<String>,
133    /// Duration in seconds (if available)
134    pub duration_secs: Option<u64>,
135    /// When a checkpoint was saved during this step (ISO 8601 format string)
136    #[serde(default)]
137    pub checkpoint_saved_at: Option<String>,
138    /// Git commit OID created during this step (if any)
139    #[serde(default)]
140    pub git_commit_oid: Option<String>,
141    /// Detailed information about files modified
142    #[serde(default)]
143    pub modified_files_detail: Option<ModifiedFilesDetail>,
144    /// The prompt text used for this step (for deterministic replay)
145    #[serde(default)]
146    pub prompt_used: Option<String>,
147    /// Issues summary (found and fixed counts)
148    #[serde(default)]
149    pub issues_summary: Option<IssuesSummary>,
150}
151
152impl ExecutionStep {
153    /// Create a new execution step.
154    pub fn new(phase: &str, iteration: u32, step_type: &str, outcome: StepOutcome) -> Self {
155        Self {
156            phase: phase.to_string(),
157            iteration,
158            step_type: step_type.to_string(),
159            timestamp: timestamp(),
160            outcome,
161            agent: None,
162            duration_secs: None,
163            checkpoint_saved_at: None,
164            git_commit_oid: None,
165            modified_files_detail: None,
166            prompt_used: None,
167            issues_summary: None,
168        }
169    }
170
171    /// Set the agent that executed this step.
172    pub fn with_agent(mut self, agent: &str) -> Self {
173        self.agent = Some(agent.to_string());
174        self
175    }
176
177    /// Set the duration of this step.
178    pub fn with_duration(mut self, duration_secs: u64) -> Self {
179        self.duration_secs = Some(duration_secs);
180        self
181    }
182
183    /// Set the git commit OID created during this step.
184    pub fn with_git_commit_oid(mut self, oid: &str) -> Self {
185        self.git_commit_oid = Some(oid.to_string());
186        self
187    }
188}
189
190/// Default threshold for storing file content in snapshots (10KB).
191///
192/// Files smaller than this threshold will have their full content stored
193/// in the checkpoint for automatic recovery on resume.
194const DEFAULT_CONTENT_THRESHOLD: u64 = 10 * 1024;
195
196/// Maximum file size that will be compressed in snapshots (100KB).
197///
198/// Files between DEFAULT_CONTENT_THRESHOLD and this size that are key files
199/// (PROMPT.md, PLAN.md, ISSUES.md) will be compressed before storing.
200const MAX_COMPRESS_SIZE: u64 = 100 * 1024;
201
202/// Snapshot of a file's state at a point in time.
203#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
204pub struct FileSnapshot {
205    /// Path to the file
206    pub path: String,
207    /// SHA-256 checksum of file contents
208    pub checksum: String,
209    /// File size in bytes
210    pub size: u64,
211    /// For small files (< 10KB by default), store full content
212    pub content: Option<String>,
213    /// Compressed content (base64-encoded gzip) for larger key files
214    pub compressed_content: Option<String>,
215    /// Whether the file existed
216    pub exists: bool,
217}
218
219impl FileSnapshot {
220    /// Create a new file snapshot with the default content threshold (10KB).
221    ///
222    /// This version does not capture file content (content and compressed_content will be None).
223    /// Use `from_workspace` to create a snapshot with content from a workspace.
224    pub fn new(path: &str, checksum: String, size: u64, exists: bool) -> Self {
225        Self {
226            path: path.to_string(),
227            checksum,
228            size,
229            content: None,
230            compressed_content: None,
231            exists,
232        }
233    }
234
235    /// Create a file snapshot from a workspace using the default content threshold (10KB).
236    ///
237    /// Files smaller than 10KB will have their content stored.
238    /// Key files (PROMPT.md, PLAN.md, ISSUES.md, NOTES.md) may be compressed if they
239    /// are between 10KB and 100KB.
240    pub fn from_workspace_default(
241        workspace: &dyn Workspace,
242        path: &str,
243        checksum: String,
244        size: u64,
245        exists: bool,
246    ) -> Self {
247        Self::from_workspace(
248            workspace,
249            path,
250            checksum,
251            size,
252            exists,
253            DEFAULT_CONTENT_THRESHOLD,
254        )
255    }
256
257    /// Create a file snapshot from a workspace, optionally capturing content.
258    ///
259    /// Files smaller than `max_size` bytes will have their content stored.
260    /// Key files (PROMPT.md, PLAN.md, ISSUES.md, NOTES.md) may be compressed if they
261    /// are between max_size and MAX_COMPRESS_SIZE.
262    pub fn from_workspace(
263        workspace: &dyn Workspace,
264        path: &str,
265        checksum: String,
266        size: u64,
267        exists: bool,
268        max_size: u64,
269    ) -> Self {
270        let mut content = None;
271        let mut compressed_content = None;
272
273        if exists {
274            let is_key_file = path.contains("PROMPT.md")
275                || path.contains("PLAN.md")
276                || path.contains("ISSUES.md")
277                || path.contains("NOTES.md");
278
279            let path_ref = Path::new(path);
280
281            if size < max_size {
282                // For small files, read and store content directly
283                content = workspace.read(path_ref).ok();
284            } else if is_key_file && size < MAX_COMPRESS_SIZE {
285                // For larger key files, compress the content
286                if let Ok(data) = workspace.read_bytes(path_ref) {
287                    compressed_content = compress_data(&data).ok();
288                }
289            }
290        }
291
292        Self {
293            path: path.to_string(),
294            checksum,
295            size,
296            content,
297            compressed_content,
298            exists,
299        }
300    }
301
302    /// Get the file content, decompressing if necessary.
303    pub fn get_content(&self) -> Option<String> {
304        if let Some(ref content) = self.content {
305            Some(content.clone())
306        } else if let Some(ref compressed) = self.compressed_content {
307            decompress_data(compressed).ok()
308        } else {
309            None
310        }
311    }
312
313    /// Create a snapshot for a non-existent file.
314    pub fn not_found(path: &str) -> Self {
315        Self {
316            path: path.to_string(),
317            checksum: String::new(),
318            size: 0,
319            content: None,
320            compressed_content: None,
321            exists: false,
322        }
323    }
324
325    /// Verify that the current file state matches this snapshot using a workspace.
326    pub fn verify_with_workspace(&self, workspace: &dyn Workspace) -> bool {
327        let path = Path::new(&self.path);
328
329        if !self.exists {
330            return !workspace.exists(path);
331        }
332
333        let Ok(content) = workspace.read_bytes(path) else {
334            return false;
335        };
336
337        if content.len() as u64 != self.size {
338            return false;
339        }
340
341        let checksum = crate::checkpoint::state::calculate_checksum_from_bytes(&content);
342        checksum == self.checksum
343    }
344}
345
346/// Compress data using gzip and encode as base64.
347///
348/// This is used to store larger file content in checkpoints without
349/// bloating the checkpoint file size too much.
350fn compress_data(data: &[u8]) -> Result<String, std::io::Error> {
351    use base64::{engine::general_purpose::STANDARD, Engine};
352    use flate2::write::GzEncoder;
353    use flate2::Compression;
354    use std::io::Write;
355
356    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
357    encoder.write_all(data)?;
358    let compressed = encoder.finish()?;
359
360    Ok(STANDARD.encode(&compressed))
361}
362
363/// Decompress data that was compressed with compress_data.
364fn decompress_data(encoded: &str) -> Result<String, std::io::Error> {
365    use base64::{engine::general_purpose::STANDARD, Engine};
366    use flate2::read::GzDecoder;
367    use std::io::Read;
368
369    let compressed = STANDARD.decode(encoded).map_err(|e| {
370        std::io::Error::new(
371            std::io::ErrorKind::InvalidData,
372            format!("Base64 decode error: {}", e),
373        )
374    })?;
375
376    let mut decoder = GzDecoder::new(compressed.as_slice());
377    let mut decompressed = Vec::new();
378    decoder.read_to_end(&mut decompressed)?;
379
380    String::from_utf8(decompressed).map_err(|e| {
381        std::io::Error::new(
382            std::io::ErrorKind::InvalidData,
383            format!("UTF-8 decode error: {}", e),
384        )
385    })
386}
387
388/// Execution history tracking.
389#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
390pub struct ExecutionHistory {
391    /// All execution steps in order
392    pub steps: Vec<ExecutionStep>,
393    /// File snapshots for key files at checkpoint time
394    pub file_snapshots: HashMap<String, FileSnapshot>,
395}
396
397impl ExecutionHistory {
398    /// Create a new execution history.
399    pub fn new() -> Self {
400        Self::default()
401    }
402
403    /// Add an execution step.
404    pub fn add_step(&mut self, step: ExecutionStep) {
405        self.steps.push(step);
406    }
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    #[test]
414    fn test_execution_step_new() {
415        let outcome = StepOutcome::success(None, vec!["test.txt".to_string()]);
416
417        let step = ExecutionStep::new("Development", 1, "dev_run", outcome);
418
419        assert_eq!(step.phase, "Development");
420        assert_eq!(step.iteration, 1);
421        assert_eq!(step.step_type, "dev_run");
422        assert!(step.agent.is_none());
423        assert!(step.duration_secs.is_none());
424        // Verify new fields are None by default
425        assert!(step.git_commit_oid.is_none());
426        assert!(step.modified_files_detail.is_none());
427        assert!(step.prompt_used.is_none());
428        assert!(step.issues_summary.is_none());
429    }
430
431    #[test]
432    fn test_execution_step_with_agent() {
433        let outcome = StepOutcome::success(None, vec![]);
434
435        let step = ExecutionStep::new("Development", 1, "dev_run", outcome)
436            .with_agent("claude")
437            .with_duration(120);
438
439        assert_eq!(step.agent, Some("claude".to_string()));
440        assert_eq!(step.duration_secs, Some(120));
441    }
442
443    #[test]
444    fn test_execution_step_new_fields_default() {
445        let outcome = StepOutcome::success(None, vec![]);
446        let step = ExecutionStep::new("Development", 1, "dev_run", outcome);
447
448        // Verify new fields are None by default
449        assert!(step.git_commit_oid.is_none());
450        assert!(step.modified_files_detail.is_none());
451        assert!(step.prompt_used.is_none());
452        assert!(step.issues_summary.is_none());
453    }
454
455    #[test]
456    fn test_modified_files_detail_default() {
457        let detail = ModifiedFilesDetail::default();
458        assert!(detail.added.is_empty());
459        assert!(detail.modified.is_empty());
460        assert!(detail.deleted.is_empty());
461    }
462
463    #[test]
464    fn test_issues_summary_default() {
465        let summary = IssuesSummary::default();
466        assert_eq!(summary.found, 0);
467        assert_eq!(summary.fixed, 0);
468        assert!(summary.description.is_none());
469    }
470
471    #[test]
472    fn test_file_snapshot() {
473        let snapshot = FileSnapshot::new("test.txt", "abc123".to_string(), 100, true);
474
475        assert_eq!(snapshot.path, "test.txt");
476        assert_eq!(snapshot.checksum, "abc123");
477        assert_eq!(snapshot.size, 100);
478        assert!(snapshot.exists);
479    }
480
481    #[test]
482    fn test_file_snapshot_not_found() {
483        let snapshot = FileSnapshot::not_found("missing.txt");
484
485        assert_eq!(snapshot.path, "missing.txt");
486        assert!(!snapshot.exists);
487        assert_eq!(snapshot.size, 0);
488    }
489
490    #[test]
491    fn test_execution_history_add_step() {
492        let mut history = ExecutionHistory::new();
493        let outcome = StepOutcome::success(None, vec![]);
494
495        let step = ExecutionStep::new("Development", 1, "dev_run", outcome);
496
497        history.add_step(step);
498
499        assert_eq!(history.steps.len(), 1);
500        assert_eq!(history.steps[0].phase, "Development");
501        assert_eq!(history.steps[0].iteration, 1);
502    }
503
504    #[test]
505    fn test_execution_step_serialization_with_new_fields() {
506        // Create a step with new fields via JSON to test backward compatibility
507        let json_str = r#"{
508            "phase": "Review",
509            "iteration": 1,
510            "step_type": "review",
511            "timestamp": "2025-01-20 12:00:00",
512            "outcome": {"Success": {"output": null, "files_modified": [], "exit_code": 0}},
513            "agent": null,
514            "duration_secs": null,
515            "checkpoint_saved_at": null,
516            "git_commit_oid": "abc123",
517            "modified_files_detail": {
518                "added": ["a.rs"],
519                "modified": [],
520                "deleted": []
521            },
522            "prompt_used": "Fix issues",
523            "issues_summary": {
524                "found": 2,
525                "fixed": 2,
526                "description": "All fixed"
527            }
528        }"#;
529
530        let deserialized: ExecutionStep = serde_json::from_str(json_str).unwrap();
531
532        assert_eq!(deserialized.git_commit_oid, Some("abc123".to_string()));
533        assert_eq!(
534            deserialized.modified_files_detail.as_ref().unwrap().added,
535            vec!["a.rs"]
536        );
537        assert_eq!(deserialized.prompt_used, Some("Fix issues".to_string()));
538        assert_eq!(deserialized.issues_summary.as_ref().unwrap().found, 2);
539    }
540}