Skip to main content

ralph_workflow/checkpoint/
execution_history.rs

1//! Execution history tracking for checkpoint state.
2//!
3//! This module provides structures for tracking the execution history of a pipeline,
4//! enabling idempotent recovery and validation of state.
5
6use crate::checkpoint::timestamp;
7use crate::workspace::Workspace;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::path::Path;
11
12/// Outcome of an execution step.
13#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
14pub enum StepOutcome {
15    /// Step completed successfully
16    Success {
17        output: Option<String>,
18        files_modified: Vec<String>,
19        #[serde(default)]
20        exit_code: Option<i32>,
21    },
22    /// Step failed with error
23    Failure {
24        error: String,
25        recoverable: bool,
26        #[serde(default)]
27        exit_code: Option<i32>,
28        #[serde(default)]
29        signals: Vec<String>,
30    },
31    /// Step partially completed (may need retry)
32    Partial {
33        completed: String,
34        remaining: String,
35        #[serde(default)]
36        exit_code: Option<i32>,
37    },
38    /// Step was skipped (e.g., already done)
39    Skipped { reason: String },
40}
41
42impl StepOutcome {
43    /// Create a Success outcome with default values.
44    pub fn success(output: Option<String>, files_modified: Vec<String>) -> Self {
45        Self::Success {
46            output,
47            files_modified,
48            exit_code: Some(0),
49        }
50    }
51
52    /// Create a Failure outcome with default values.
53    pub fn failure(error: String, recoverable: bool) -> Self {
54        Self::Failure {
55            error,
56            recoverable,
57            exit_code: None,
58            signals: Vec::new(),
59        }
60    }
61
62    /// Create a Partial outcome with default values.
63    pub fn partial(completed: String, remaining: String) -> Self {
64        Self::Partial {
65            completed,
66            remaining,
67            exit_code: None,
68        }
69    }
70
71    /// Create a Skipped outcome.
72    pub fn skipped(reason: String) -> Self {
73        Self::Skipped { reason }
74    }
75}
76
77/// Detailed information about files modified in a step.
78#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
79pub struct ModifiedFilesDetail {
80    #[serde(default)]
81    pub added: Vec<String>,
82    #[serde(default)]
83    pub modified: Vec<String>,
84    #[serde(default)]
85    pub deleted: Vec<String>,
86}
87
88/// Summary of issues found and fixed during a step.
89#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
90pub struct IssuesSummary {
91    /// Number of issues found
92    #[serde(default)]
93    pub found: u32,
94    /// Number of issues fixed
95    #[serde(default)]
96    pub fixed: u32,
97    /// Description of issues (e.g., "3 clippy warnings, 2 test failures")
98    #[serde(default)]
99    pub description: Option<String>,
100}
101
102/// A single execution step in the pipeline history.
103#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
104pub struct ExecutionStep {
105    /// Phase this step belongs to
106    pub phase: String,
107    /// Iteration number (for development/review iterations)
108    pub iteration: u32,
109    /// Type of step (e.g., "review", "fix", "commit")
110    pub step_type: String,
111    /// When this step was executed (ISO 8601 format string)
112    pub timestamp: String,
113    /// Outcome of the step
114    pub outcome: StepOutcome,
115    /// Agent that executed this step
116    pub agent: Option<String>,
117    /// Duration in seconds (if available)
118    pub duration_secs: Option<u64>,
119    /// When a checkpoint was saved during this step (ISO 8601 format string)
120    #[serde(default)]
121    pub checkpoint_saved_at: Option<String>,
122    /// Git commit OID created during this step (if any)
123    #[serde(default)]
124    pub git_commit_oid: Option<String>,
125    /// Detailed information about files modified
126    #[serde(default)]
127    pub modified_files_detail: Option<ModifiedFilesDetail>,
128    /// The prompt text used for this step (for deterministic replay)
129    #[serde(default)]
130    pub prompt_used: Option<String>,
131    /// Issues summary (found and fixed counts)
132    #[serde(default)]
133    pub issues_summary: Option<IssuesSummary>,
134}
135
136impl ExecutionStep {
137    /// Create a new execution step.
138    pub fn new(phase: &str, iteration: u32, step_type: &str, outcome: StepOutcome) -> Self {
139        Self {
140            phase: phase.to_string(),
141            iteration,
142            step_type: step_type.to_string(),
143            timestamp: timestamp(),
144            outcome,
145            agent: None,
146            duration_secs: None,
147            checkpoint_saved_at: None,
148            git_commit_oid: None,
149            modified_files_detail: None,
150            prompt_used: None,
151            issues_summary: None,
152        }
153    }
154
155    /// Set the agent that executed this step.
156    pub fn with_agent(mut self, agent: &str) -> Self {
157        self.agent = Some(agent.to_string());
158        self
159    }
160
161    /// Set the duration of this step.
162    pub fn with_duration(mut self, duration_secs: u64) -> Self {
163        self.duration_secs = Some(duration_secs);
164        self
165    }
166
167    /// Set the git commit OID created during this step.
168    pub fn with_git_commit_oid(mut self, oid: &str) -> Self {
169        self.git_commit_oid = Some(oid.to_string());
170        self
171    }
172}
173
174/// Default threshold for storing file content in snapshots (10KB).
175///
176/// Files smaller than this threshold will have their full content stored
177/// in the checkpoint for automatic recovery on resume.
178const DEFAULT_CONTENT_THRESHOLD: u64 = 10 * 1024;
179
180/// Maximum file size that will be compressed in snapshots (100KB).
181///
182/// Files between DEFAULT_CONTENT_THRESHOLD and this size that are key files
183/// (PROMPT.md, PLAN.md, ISSUES.md) will be compressed before storing.
184const MAX_COMPRESS_SIZE: u64 = 100 * 1024;
185
186/// Snapshot of a file's state at a point in time.
187#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
188pub struct FileSnapshot {
189    /// Path to the file
190    pub path: String,
191    /// SHA-256 checksum of file contents
192    pub checksum: String,
193    /// File size in bytes
194    pub size: u64,
195    /// For small files (< 10KB by default), store full content
196    pub content: Option<String>,
197    /// Compressed content (base64-encoded gzip) for larger key files
198    pub compressed_content: Option<String>,
199    /// Whether the file existed
200    pub exists: bool,
201}
202
203impl FileSnapshot {
204    /// Create a new file snapshot with the default content threshold (10KB).
205    ///
206    /// This version does not capture file content (content and compressed_content will be None).
207    /// Use `from_workspace` to create a snapshot with content from a workspace.
208    pub fn new(path: &str, checksum: String, size: u64, exists: bool) -> Self {
209        Self {
210            path: path.to_string(),
211            checksum,
212            size,
213            content: None,
214            compressed_content: None,
215            exists,
216        }
217    }
218
219    /// Create a file snapshot from a workspace using the default content threshold (10KB).
220    ///
221    /// Files smaller than 10KB will have their content stored.
222    /// Key files (PROMPT.md, PLAN.md, ISSUES.md, NOTES.md) may be compressed if they
223    /// are between 10KB and 100KB.
224    pub fn from_workspace_default(
225        workspace: &dyn Workspace,
226        path: &str,
227        checksum: String,
228        size: u64,
229        exists: bool,
230    ) -> Self {
231        Self::from_workspace(
232            workspace,
233            path,
234            checksum,
235            size,
236            exists,
237            DEFAULT_CONTENT_THRESHOLD,
238        )
239    }
240
241    /// Create a file snapshot from a workspace, optionally capturing content.
242    ///
243    /// Files smaller than `max_size` bytes will have their content stored.
244    /// Key files (PROMPT.md, PLAN.md, ISSUES.md, NOTES.md) may be compressed if they
245    /// are between max_size and MAX_COMPRESS_SIZE.
246    pub fn from_workspace(
247        workspace: &dyn Workspace,
248        path: &str,
249        checksum: String,
250        size: u64,
251        exists: bool,
252        max_size: u64,
253    ) -> Self {
254        let mut content = None;
255        let mut compressed_content = None;
256
257        if exists {
258            let is_key_file = path.contains("PROMPT.md")
259                || path.contains("PLAN.md")
260                || path.contains("ISSUES.md")
261                || path.contains("NOTES.md");
262
263            let path_ref = Path::new(path);
264
265            if size < max_size {
266                // For small files, read and store content directly
267                content = workspace.read(path_ref).ok();
268            } else if is_key_file && size < MAX_COMPRESS_SIZE {
269                // For larger key files, compress the content
270                if let Ok(data) = workspace.read_bytes(path_ref) {
271                    compressed_content = compress_data(&data).ok();
272                }
273            }
274        }
275
276        Self {
277            path: path.to_string(),
278            checksum,
279            size,
280            content,
281            compressed_content,
282            exists,
283        }
284    }
285
286    /// Get the file content, decompressing if necessary.
287    pub fn get_content(&self) -> Option<String> {
288        if let Some(ref content) = self.content {
289            Some(content.clone())
290        } else if let Some(ref compressed) = self.compressed_content {
291            decompress_data(compressed).ok()
292        } else {
293            None
294        }
295    }
296
297    /// Create a snapshot for a non-existent file.
298    pub fn not_found(path: &str) -> Self {
299        Self {
300            path: path.to_string(),
301            checksum: String::new(),
302            size: 0,
303            content: None,
304            compressed_content: None,
305            exists: false,
306        }
307    }
308
309    /// Verify that the current file state matches this snapshot using a workspace.
310    pub fn verify_with_workspace(&self, workspace: &dyn Workspace) -> bool {
311        let path = Path::new(&self.path);
312
313        if !self.exists {
314            return !workspace.exists(path);
315        }
316
317        let Ok(content) = workspace.read_bytes(path) else {
318            return false;
319        };
320
321        if content.len() as u64 != self.size {
322            return false;
323        }
324
325        let checksum = crate::checkpoint::state::calculate_checksum_from_bytes(&content);
326        checksum == self.checksum
327    }
328}
329
330/// Compress data using gzip and encode as base64.
331///
332/// This is used to store larger file content in checkpoints without
333/// bloating the checkpoint file size too much.
334fn compress_data(data: &[u8]) -> Result<String, std::io::Error> {
335    use base64::{engine::general_purpose::STANDARD, Engine};
336    use flate2::write::GzEncoder;
337    use flate2::Compression;
338    use std::io::Write;
339
340    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
341    encoder.write_all(data)?;
342    let compressed = encoder.finish()?;
343
344    Ok(STANDARD.encode(&compressed))
345}
346
347/// Decompress data that was compressed with compress_data.
348fn decompress_data(encoded: &str) -> Result<String, std::io::Error> {
349    use base64::{engine::general_purpose::STANDARD, Engine};
350    use flate2::read::GzDecoder;
351    use std::io::Read;
352
353    let compressed = STANDARD.decode(encoded).map_err(|e| {
354        std::io::Error::new(
355            std::io::ErrorKind::InvalidData,
356            format!("Base64 decode error: {}", e),
357        )
358    })?;
359
360    let mut decoder = GzDecoder::new(compressed.as_slice());
361    let mut decompressed = Vec::new();
362    decoder.read_to_end(&mut decompressed)?;
363
364    String::from_utf8(decompressed).map_err(|e| {
365        std::io::Error::new(
366            std::io::ErrorKind::InvalidData,
367            format!("UTF-8 decode error: {}", e),
368        )
369    })
370}
371
372/// Execution history tracking.
373#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
374pub struct ExecutionHistory {
375    /// All execution steps in order
376    pub steps: Vec<ExecutionStep>,
377    /// File snapshots for key files at checkpoint time
378    pub file_snapshots: HashMap<String, FileSnapshot>,
379}
380
381impl ExecutionHistory {
382    /// Create a new execution history.
383    pub fn new() -> Self {
384        Self::default()
385    }
386
387    /// Add an execution step.
388    pub fn add_step(&mut self, step: ExecutionStep) {
389        self.steps.push(step);
390    }
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396
397    #[test]
398    fn test_execution_step_new() {
399        let outcome = StepOutcome::success(None, vec!["test.txt".to_string()]);
400        let step = ExecutionStep::new("Development", 1, "dev_run", outcome);
401        assert_eq!(step.phase, "Development");
402        assert_eq!(step.iteration, 1);
403        assert_eq!(step.step_type, "dev_run");
404        assert!(step.agent.is_none());
405        assert!(step.duration_secs.is_none());
406        // Verify new fields are None by default
407        assert!(step.git_commit_oid.is_none());
408        assert!(step.modified_files_detail.is_none());
409        assert!(step.prompt_used.is_none());
410        assert!(step.issues_summary.is_none());
411    }
412
413    #[test]
414    fn test_execution_step_with_agent() {
415        let outcome = StepOutcome::success(None, vec![]);
416        let step = ExecutionStep::new("Development", 1, "dev_run", outcome)
417            .with_agent("claude")
418            .with_duration(120);
419        assert_eq!(step.agent, Some("claude".to_string()));
420        assert_eq!(step.duration_secs, Some(120));
421    }
422
423    #[test]
424    fn test_execution_step_new_fields_default() {
425        let outcome = StepOutcome::success(None, vec![]);
426        let step = ExecutionStep::new("Development", 1, "dev_run", outcome);
427        // Verify new fields are None by default
428        assert!(step.git_commit_oid.is_none());
429        assert!(step.modified_files_detail.is_none());
430        assert!(step.prompt_used.is_none());
431        assert!(step.issues_summary.is_none());
432    }
433
434    #[test]
435    fn test_modified_files_detail_default() {
436        let detail = ModifiedFilesDetail::default();
437        assert!(detail.added.is_empty());
438        assert!(detail.modified.is_empty());
439        assert!(detail.deleted.is_empty());
440    }
441
442    #[test]
443    fn test_issues_summary_default() {
444        let summary = IssuesSummary::default();
445        assert_eq!(summary.found, 0);
446        assert_eq!(summary.fixed, 0);
447        assert!(summary.description.is_none());
448    }
449
450    #[test]
451    fn test_file_snapshot() {
452        let snapshot = FileSnapshot::new("test.txt", "abc123".to_string(), 100, true);
453        assert_eq!(snapshot.path, "test.txt");
454        assert_eq!(snapshot.checksum, "abc123");
455        assert_eq!(snapshot.size, 100);
456        assert!(snapshot.exists);
457    }
458
459    #[test]
460    fn test_file_snapshot_not_found() {
461        let snapshot = FileSnapshot::not_found("missing.txt");
462        assert_eq!(snapshot.path, "missing.txt");
463        assert!(!snapshot.exists);
464        assert_eq!(snapshot.size, 0);
465    }
466
467    #[test]
468    fn test_execution_history_add_step() {
469        let mut history = ExecutionHistory::new();
470        let outcome = StepOutcome::success(None, vec![]);
471        let step = ExecutionStep::new("Development", 1, "dev_run", outcome);
472        history.add_step(step);
473        assert_eq!(history.steps.len(), 1);
474        assert_eq!(history.steps[0].phase, "Development");
475        assert_eq!(history.steps[0].iteration, 1);
476    }
477
478    #[test]
479    fn test_execution_step_serialization_with_new_fields() {
480        // Create a step with new fields via JSON to test backward compatibility
481        let json_str = r#"{"phase":"Review","iteration":1,"step_type":"review","timestamp":"2025-01-20 12:00:00","outcome":{"Success":{"output":null,"files_modified":[],"exit_code":0}},"agent":null,"duration_secs":null,"checkpoint_saved_at":null,"git_commit_oid":"abc123","modified_files_detail":{"added":["a.rs"],"modified":[],"deleted":[]},"prompt_used":"Fix issues","issues_summary":{"found":2,"fixed":2,"description":"All fixed"}}"#;
482        let deserialized: ExecutionStep = serde_json::from_str(json_str).unwrap();
483        assert_eq!(deserialized.git_commit_oid, Some("abc123".to_string()));
484        assert_eq!(
485            deserialized.modified_files_detail.as_ref().unwrap().added,
486            vec!["a.rs"]
487        );
488        assert_eq!(deserialized.prompt_used, Some("Fix issues".to_string()));
489        assert_eq!(deserialized.issues_summary.as_ref().unwrap().found, 2);
490    }
491}