Skip to main content

ralph_workflow/checkpoint/
execution_history.rs

1//! Execution history tracking for checkpoint state.
2//!
3//! This module provides structures for tracking the execution history of a pipeline,
4//! enabling idempotent recovery and validation of state.
5
6pub mod compression;
7
8use crate::checkpoint::timestamp;
9use crate::workspace::Workspace;
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, VecDeque};
12use std::path::Path;
13use std::sync::Arc;
14
15fn deserialize_option_boxed_string_slice_none_if_empty<'de, D>(
16    deserializer: D,
17) -> Result<Option<Box<[String]>>, D::Error>
18where
19    D: serde::Deserializer<'de>,
20{
21    let opt = Option::<Vec<String>>::deserialize(deserializer)?;
22    Ok(match opt {
23        None => None,
24        Some(v) if v.is_empty() => None,
25        Some(v) => Some(v.into_boxed_slice()),
26    })
27}
28
29fn serialize_option_boxed_string_slice_empty_if_none_field<S, V>(
30    value: V,
31    serializer: S,
32) -> Result<S::Ok, S::Error>
33where
34    S: serde::Serializer,
35    V: std::ops::Deref<Target = Option<Box<[String]>>>,
36{
37    let values = (*value).as_deref();
38    serialize_option_boxed_string_slice_empty_if_none(values, serializer)
39}
40
41fn serialize_option_boxed_string_slice_empty_if_none<S>(
42    value: Option<&[String]>,
43    serializer: S,
44) -> Result<S::Ok, S::Error>
45where
46    S: serde::Serializer,
47{
48    use serde::ser::SerializeSeq;
49
50    if let Some(values) = value {
51        values.serialize(serializer)
52    } else {
53        let seq = serializer.serialize_seq(Some(0))?;
54        seq.end()
55    }
56}
57
58/// Outcome of an execution step.
59///
60/// # Memory Optimization
61///
62/// This enum uses Box<str> for string fields and Option<Box<[String]>> for
63/// collections to reduce allocation overhead when fields are empty or small.
64/// Vec<T> over-allocates capacity, while Box<[T]> uses exactly the needed space.
65#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
66pub enum StepOutcome {
67    /// Step completed successfully
68    Success {
69        output: Option<Box<str>>,
70        #[serde(
71            default,
72            deserialize_with = "deserialize_option_boxed_string_slice_none_if_empty",
73            serialize_with = "serialize_option_boxed_string_slice_empty_if_none_field"
74        )]
75        files_modified: Option<Box<[String]>>,
76        #[serde(default)]
77        exit_code: Option<i32>,
78    },
79    /// Step failed with error
80    Failure {
81        error: Box<str>,
82        recoverable: bool,
83        #[serde(default)]
84        exit_code: Option<i32>,
85        #[serde(
86            default,
87            deserialize_with = "deserialize_option_boxed_string_slice_none_if_empty",
88            serialize_with = "serialize_option_boxed_string_slice_empty_if_none_field"
89        )]
90        signals: Option<Box<[String]>>,
91    },
92    /// Step partially completed (may need retry)
93    Partial {
94        completed: Box<str>,
95        remaining: Box<str>,
96        #[serde(default)]
97        exit_code: Option<i32>,
98    },
99    /// Step was skipped (e.g., already done)
100    Skipped { reason: Box<str> },
101}
102
103impl StepOutcome {
104    /// Create a Success outcome with default values.
105    pub fn success(output: Option<String>, files_modified: Vec<String>) -> Self {
106        Self::Success {
107            output: output.map(String::into_boxed_str),
108            files_modified: if files_modified.is_empty() {
109                None
110            } else {
111                Some(files_modified.into_boxed_slice())
112            },
113            exit_code: Some(0),
114        }
115    }
116
117    /// Create a Failure outcome with default values.
118    #[must_use]
119    pub fn failure(error: String, recoverable: bool) -> Self {
120        Self::Failure {
121            error: error.into_boxed_str(),
122            recoverable,
123            exit_code: None,
124            signals: None,
125        }
126    }
127
128    /// Create a Partial outcome with default values.
129    #[must_use]
130    pub fn partial(completed: String, remaining: String) -> Self {
131        Self::Partial {
132            completed: completed.into_boxed_str(),
133            remaining: remaining.into_boxed_str(),
134            exit_code: None,
135        }
136    }
137
138    /// Create a Skipped outcome.
139    #[must_use]
140    pub fn skipped(reason: String) -> Self {
141        Self::Skipped {
142            reason: reason.into_boxed_str(),
143        }
144    }
145}
146
147/// Detailed information about files modified in a step.
148///
149/// # Memory Optimization
150///
151/// Uses `Option<Box<[String]>>` instead of `Vec<String>` to save memory:
152/// - Empty collections use `None` instead of empty Vec (saves 24 bytes per field)
153/// - Non-empty collections use `Box<[String]>` which is 16 bytes vs Vec's 24 bytes
154/// - Total savings: up to 72 bytes per instance when all fields are empty
155#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
156pub struct ModifiedFilesDetail {
157    #[serde(
158        default,
159        skip_serializing_if = "Option::is_none",
160        deserialize_with = "deserialize_option_boxed_string_slice_none_if_empty"
161    )]
162    pub added: Option<Box<[String]>>,
163    #[serde(
164        default,
165        skip_serializing_if = "Option::is_none",
166        deserialize_with = "deserialize_option_boxed_string_slice_none_if_empty"
167    )]
168    pub modified: Option<Box<[String]>>,
169    #[serde(
170        default,
171        skip_serializing_if = "Option::is_none",
172        deserialize_with = "deserialize_option_boxed_string_slice_none_if_empty"
173    )]
174    pub deleted: Option<Box<[String]>>,
175}
176
177/// Summary of issues found and fixed during a step.
178#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
179pub struct IssuesSummary {
180    /// Number of issues found
181    #[serde(default)]
182    pub found: u32,
183    /// Number of issues fixed
184    #[serde(default)]
185    pub fixed: u32,
186    /// Description of issues (e.g., "3 clippy warnings, 2 test failures")
187    #[serde(default, skip_serializing_if = "Option::is_none")]
188    pub description: Option<String>,
189}
190
191/// A single execution step in the pipeline history.
192///
193/// # Memory Optimization
194///
195/// This struct uses Arc<str> for `phase` and `agent` fields to reduce memory
196/// usage through string interning. Phase names and agent names are repeated
197/// frequently across execution history entries, so sharing allocations via
198/// Arc<str> significantly reduces heap usage.
199///
200/// Serialization/deserialization is backward-compatible - Arc<str> is serialized
201/// as a regular string and can be deserialized from both old (String) and new
202/// (Arc<str>) checkpoint formats.
203#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
204pub struct ExecutionStep {
205    /// Phase this step belongs to (interned via Arc<str>)
206    pub phase: Arc<str>,
207    /// Iteration number (for development/review iterations)
208    pub iteration: u32,
209    /// Type of step (e.g., "review", "fix", "commit")
210    pub step_type: Box<str>,
211    /// When this step was executed (ISO 8601 format string)
212    pub timestamp: String,
213    /// Outcome of the step
214    pub outcome: StepOutcome,
215    /// Agent that executed this step (interned via Arc<str>)
216    #[serde(default, skip_serializing_if = "Option::is_none")]
217    pub agent: Option<Arc<str>>,
218    /// Duration in seconds (if available)
219    #[serde(default, skip_serializing_if = "Option::is_none")]
220    pub duration_secs: Option<u64>,
221    /// When a checkpoint was saved during this step (ISO 8601 format string)
222    #[serde(default, skip_serializing_if = "Option::is_none")]
223    pub checkpoint_saved_at: Option<String>,
224    /// Git commit OID created during this step (if any)
225    #[serde(default, skip_serializing_if = "Option::is_none")]
226    pub git_commit_oid: Option<String>,
227    /// Detailed information about files modified
228    #[serde(default, skip_serializing_if = "Option::is_none")]
229    pub modified_files_detail: Option<ModifiedFilesDetail>,
230    /// The prompt text used for this step (for deterministic replay)
231    #[serde(default, skip_serializing_if = "Option::is_none")]
232    pub prompt_used: Option<String>,
233    /// Issues summary (found and fixed counts)
234    #[serde(default, skip_serializing_if = "Option::is_none")]
235    pub issues_summary: Option<IssuesSummary>,
236}
237
238impl ExecutionStep {
239    /// Create a new execution step.
240    ///
241    /// # Performance Note
242    ///
243    /// For optimal memory usage, use `new_with_pool` to intern repeated phase
244    /// and agent names via a `StringPool`. This constructor creates new Arc<str>
245    /// allocations for each call.
246    #[must_use]
247    pub fn new(phase: &str, iteration: u32, step_type: &str, outcome: StepOutcome) -> Self {
248        Self {
249            phase: Arc::from(phase),
250            iteration,
251            step_type: Box::from(step_type),
252            timestamp: timestamp(),
253            outcome,
254            agent: None,
255            duration_secs: None,
256            checkpoint_saved_at: None,
257            git_commit_oid: None,
258            modified_files_detail: None,
259            prompt_used: None,
260            issues_summary: None,
261        }
262    }
263
264    /// Create a new execution step using a `StringPool` for interning.
265    ///
266    /// This is the preferred constructor when creating many `ExecutionSteps`,
267    /// as it reduces memory usage by sharing allocations for repeated phase
268    /// and agent names.
269    pub fn new_with_pool(
270        phase: &str,
271        iteration: u32,
272        step_type: &str,
273        outcome: StepOutcome,
274        pool: crate::checkpoint::StringPool,
275    ) -> (Self, crate::checkpoint::StringPool) {
276        let (pool, phase_arc) = pool.intern_str(phase);
277        (
278            Self {
279                phase: phase_arc,
280                iteration,
281                step_type: Box::from(step_type),
282                timestamp: timestamp(),
283                outcome,
284                agent: None,
285                duration_secs: None,
286                checkpoint_saved_at: None,
287                git_commit_oid: None,
288                modified_files_detail: None,
289                prompt_used: None,
290                issues_summary: None,
291            },
292            pool,
293        )
294    }
295
296    /// Set the agent that executed this step.
297    #[must_use]
298    pub fn with_agent(mut self, agent: &str) -> Self {
299        self.agent = Some(Arc::from(agent));
300        self
301    }
302
303    /// Set the agent using a `StringPool` for interning.
304    #[must_use]
305    pub fn with_agent_pooled(
306        mut self,
307        agent: &str,
308        pool: crate::checkpoint::StringPool,
309    ) -> (Self, crate::checkpoint::StringPool) {
310        let (pool, agent_arc) = pool.intern_str(agent);
311        self.agent = Some(agent_arc);
312        (self, pool)
313    }
314
315    /// Set the duration of this step.
316    #[must_use]
317    pub const fn with_duration(mut self, duration_secs: u64) -> Self {
318        self.duration_secs = Some(duration_secs);
319        self
320    }
321
322    /// Set the git commit OID created during this step.
323    #[must_use]
324    pub fn with_git_commit_oid(mut self, oid: &str) -> Self {
325        self.git_commit_oid = Some(oid.to_string());
326        self
327    }
328}
329
330include!("execution_history/file_snapshot.rs");
331
332/// Execution history tracking.
333#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
334pub struct ExecutionHistory {
335    /// All execution steps in order
336    pub steps: VecDeque<ExecutionStep>,
337    /// File snapshots for key files at checkpoint time
338    pub file_snapshots: HashMap<String, FileSnapshot>,
339}
340
341impl ExecutionHistory {
342    /// Execution history must be bounded.
343    ///
344    /// The historical unbounded `add_step` API is intentionally not available in
345    /// non-test builds to avoid reintroducing unbounded growth.
346    ///
347    /// ```compile_fail
348    /// use ralph_workflow::checkpoint::ExecutionHistory;
349    /// use ralph_workflow::checkpoint::execution_history::{ExecutionStep, StepOutcome};
350    ///
351    /// let mut history = ExecutionHistory::new();
352    /// let step = ExecutionStep::new("Development", 0, "dev_run", StepOutcome::success(None, vec![]));
353    ///
354    /// // Unbounded push is not part of the public API.
355    /// history.add_step(step);
356    /// ```
357    /// Create a new execution history.
358    #[must_use]
359    pub fn new() -> Self {
360        Self::default()
361    }
362
363    /// Add an execution step with explicit bounding (preferred method).
364    ///
365    /// This is the preferred method that enforces bounded memory growth.
366    /// Use this to prevent unbounded growth.
367    #[must_use]
368    pub fn add_step_bounded(&mut self, step: ExecutionStep, limit: usize) -> &mut Self {
369        let drop_count = self.steps.len().saturating_sub(limit.saturating_sub(1));
370        self.steps = self
371            .steps
372            .iter()
373            .skip(drop_count)
374            .chain(std::iter::once(&step))
375            .cloned()
376            .collect();
377        self
378    }
379
380    /// Clone this execution history while enforcing a hard step limit.
381    ///
382    /// This is intended for resume paths where a legacy checkpoint may contain an
383    /// oversized `steps` buffer. Cloning only the tail avoids allocating memory
384    /// proportional to the checkpoint's full history.
385    #[must_use]
386    pub fn clone_bounded(&self, limit: usize) -> Self {
387        if limit == 0 {
388            return Self {
389                steps: VecDeque::new(),
390                file_snapshots: self.file_snapshots.clone(),
391            };
392        }
393
394        let len = self.steps.len();
395        if len <= limit {
396            return self.clone();
397        }
398
399        let keep_from = len.saturating_sub(limit);
400        let steps: VecDeque<_> = self.steps.iter().skip(keep_from).cloned().collect();
401        Self {
402            steps,
403            file_snapshots: self.file_snapshots.clone(),
404        }
405    }
406}
407
408#[cfg(test)]
409mod tests;