Skip to main content

ralph_workflow/checkpoint/
builder.rs

1//! Checkpoint builder for convenient checkpoint creation.
2//!
3//! This module provides a builder pattern for creating checkpoints
4//! from various contexts in the pipeline.
5
6use crate::agents::AgentRegistry;
7use crate::checkpoint::execution_history::ExecutionHistory;
8use crate::checkpoint::file_state::FileSystemState;
9use crate::checkpoint::state::{
10    calculate_file_checksum_with_workspace, AgentConfigSnapshot, CheckpointParams, CliArgsSnapshot,
11    PipelineCheckpoint, PipelinePhase, RebaseState,
12};
13use crate::checkpoint::RunContext;
14use crate::config::{Config, ReviewDepth};
15use crate::executor::ProcessExecutor;
16use crate::logger::Logger;
17use crate::reducer::state::PromptInputsState;
18use crate::workspace::Workspace;
19use std::sync::Arc;
20
21/// Builder for creating pipeline checkpoints.
22///
23/// Provides a convenient interface for capturing all necessary state
24/// when creating checkpoints during pipeline execution.
25///
26/// # Example
27///
28/// ```ignore
29/// let checkpoint = CheckpointBuilder::new()
30///     .phase(PipelinePhase::Development, 3, 5)
31///     .reviewer_pass(1, 2)
32///     .capture_from_config(&ctx, &registry, "claude", "codex")
33///     .build();
34/// ```
35pub struct CheckpointBuilder {
36    phase: Option<PipelinePhase>,
37    iteration: u32,
38    total_iterations: u32,
39    reviewer_pass: u32,
40    total_reviewer_passes: u32,
41    developer_agent: Option<String>,
42    reviewer_agent: Option<String>,
43    cli_args: Option<CliArgsSnapshot>,
44    developer_agent_config: Option<AgentConfigSnapshot>,
45    reviewer_agent_config: Option<AgentConfigSnapshot>,
46    rebase_state: RebaseState,
47    config_path: Option<std::path::PathBuf>,
48    git_user_name: Option<String>,
49    git_user_email: Option<String>,
50    // Run context for tracking execution lineage and state
51    run_context: Option<RunContext>,
52    // Hardened resume fields
53    execution_history: Option<ExecutionHistory>,
54    prompt_history: Option<std::collections::HashMap<String, String>>,
55    prompt_inputs: Option<PromptInputsState>,
56    // Process executor for external process execution
57    executor: Option<Arc<dyn ProcessExecutor>>,
58    // Logging run_id (timestamp-based) for per-run log directory
59    log_run_id: Option<String>,
60}
61
62impl Default for CheckpointBuilder {
63    fn default() -> Self {
64        Self::new()
65    }
66}
67
68impl CheckpointBuilder {
69    /// Create a new checkpoint builder with default values.
70    pub fn new() -> Self {
71        Self {
72            phase: None,
73            iteration: 1,
74            total_iterations: 1,
75            reviewer_pass: 0,
76            total_reviewer_passes: 0,
77            developer_agent: None,
78            reviewer_agent: None,
79            cli_args: None,
80            developer_agent_config: None,
81            reviewer_agent_config: None,
82            rebase_state: RebaseState::default(),
83            config_path: None,
84            git_user_name: None,
85            git_user_email: None,
86            run_context: None,
87            execution_history: None,
88            prompt_history: None,
89            prompt_inputs: None,
90            executor: None,
91            log_run_id: None,
92        }
93    }
94
95    /// Set the phase and iteration information.
96    pub fn phase(mut self, phase: PipelinePhase, iteration: u32, total_iterations: u32) -> Self {
97        self.phase = Some(phase);
98        self.iteration = iteration;
99        self.total_iterations = total_iterations;
100        self
101    }
102
103    /// Set the reviewer pass information.
104    pub fn reviewer_pass(mut self, pass: u32, total: u32) -> Self {
105        self.reviewer_pass = pass;
106        self.total_reviewer_passes = total;
107        self
108    }
109
110    /// Set the agent names.
111    pub fn agents(mut self, developer: &str, reviewer: &str) -> Self {
112        self.developer_agent = Some(developer.to_string());
113        self.reviewer_agent = Some(reviewer.to_string());
114        self
115    }
116
117    /// Set the CLI arguments snapshot.
118    pub fn cli_args(mut self, args: CliArgsSnapshot) -> Self {
119        self.cli_args = Some(args);
120        self
121    }
122
123    /// Set the developer agent configuration snapshot.
124    pub fn developer_config(mut self, config: AgentConfigSnapshot) -> Self {
125        self.developer_agent_config = Some(config);
126        self
127    }
128
129    /// Set the reviewer agent configuration snapshot.
130    pub fn reviewer_config(mut self, config: AgentConfigSnapshot) -> Self {
131        self.reviewer_agent_config = Some(config);
132        self
133    }
134
135    /// Set the rebase state.
136    pub fn rebase_state(mut self, state: RebaseState) -> Self {
137        self.rebase_state = state;
138        self
139    }
140
141    /// Set the config path.
142    pub fn config_path(mut self, path: Option<std::path::PathBuf>) -> Self {
143        self.config_path = path;
144        self
145    }
146
147    /// Set the git user name and email.
148    pub fn git_identity(mut self, name: Option<&str>, email: Option<&str>) -> Self {
149        self.git_user_name = name.map(String::from);
150        self.git_user_email = email.map(String::from);
151        self
152    }
153
154    /// Set the process executor for external process execution.
155    pub fn with_executor(mut self, executor: Arc<dyn ProcessExecutor>) -> Self {
156        self.executor = Some(executor);
157        self
158    }
159
160    /// Capture CLI arguments from a Config.
161    pub fn capture_cli_args(mut self, config: &Config) -> Self {
162        let review_depth_str = review_depth_to_string(config.review_depth);
163        let snapshot = crate::checkpoint::state::CliArgsSnapshotBuilder::new(
164            config.developer_iters,
165            config.reviewer_reviews,
166            review_depth_str,
167            config.isolation_mode,
168        )
169        .verbosity(config.verbosity as u8)
170        .show_streaming_metrics(config.show_streaming_metrics)
171        .reviewer_json_parser(config.reviewer_json_parser.clone())
172        .build();
173        self.cli_args = Some(snapshot);
174        self
175    }
176
177    /// Capture all configuration from a PhaseContext and AgentRegistry.
178    ///
179    /// This is a convenience method that captures CLI args and both agent configs.
180    /// It takes a PhaseContext which provides access to config, registry, and agents.
181    pub fn capture_from_context(
182        mut self,
183        config: &Config,
184        registry: &AgentRegistry,
185        developer_name: &str,
186        reviewer_name: &str,
187        logger: &Logger,
188        run_context: &RunContext,
189    ) -> Self {
190        // Store run context (cloned for builder ownership)
191        self.run_context = Some(run_context.clone());
192
193        // Capture CLI args
194        self = self.capture_cli_args(config);
195
196        // Capture developer agent config
197        if let Some(agent_config) = registry.resolve_config(developer_name) {
198            let snapshot = AgentConfigSnapshot::new(
199                developer_name.to_string(),
200                agent_config.cmd.clone(),
201                agent_config.output_flag.clone(),
202                Some(agent_config.yolo_flag.clone()),
203                agent_config.can_commit,
204            )
205            .with_model_override(config.developer_model.clone())
206            .with_provider_override(config.developer_provider.clone())
207            .with_context_level(config.developer_context);
208            self.developer_agent_config = Some(snapshot);
209            self.developer_agent = Some(developer_name.to_string());
210        } else {
211            logger.warn(&format!(
212                "Developer agent '{}' not found in registry",
213                developer_name
214            ));
215        }
216
217        // Capture reviewer agent config
218        if let Some(agent_config) = registry.resolve_config(reviewer_name) {
219            let snapshot = AgentConfigSnapshot::new(
220                reviewer_name.to_string(),
221                agent_config.cmd.clone(),
222                agent_config.output_flag.clone(),
223                Some(agent_config.yolo_flag.clone()),
224                agent_config.can_commit,
225            )
226            .with_model_override(config.reviewer_model.clone())
227            .with_provider_override(config.reviewer_provider.clone())
228            .with_context_level(config.reviewer_context);
229            self.reviewer_agent_config = Some(snapshot);
230            self.reviewer_agent = Some(reviewer_name.to_string());
231        } else {
232            logger.warn(&format!(
233                "Reviewer agent '{}' not found in registry",
234                reviewer_name
235            ));
236        }
237
238        // Capture git identity
239        self.git_user_name = config.git_user_name.clone();
240        self.git_user_email = config.git_user_email.clone();
241
242        self
243    }
244
245    /// Set the executor from a PhaseContext.
246    ///
247    /// This is a convenience method that extracts the executor_arc from PhaseContext
248    /// and sets it for the checkpoint builder.
249    pub fn with_executor_from_context(mut self, executor_arc: Arc<dyn ProcessExecutor>) -> Self {
250        self.executor = Some(executor_arc);
251        self
252    }
253
254    /// Attach execution history from a PhaseContext.
255    ///
256    /// This method captures the execution history from the phase context
257    /// and attaches it to the checkpoint.
258    pub fn with_execution_history(mut self, history: ExecutionHistory) -> Self {
259        self.execution_history = Some(history);
260        self
261    }
262
263    /// Set the entire prompt history from a HashMap.
264    ///
265    /// This is useful when transferring prompts from a PhaseContext.
266    ///
267    /// # Arguments
268    ///
269    /// * `history` - HashMap of prompt keys to prompt text
270    pub fn with_prompt_history(
271        mut self,
272        history: std::collections::HashMap<String, String>,
273    ) -> Self {
274        self.prompt_history = if history.is_empty() {
275            None
276        } else {
277            Some(history)
278        };
279        self
280    }
281
282    /// Attach reducer-managed prompt input materialization state.
283    ///
284    /// This is used by reducer-driven checkpointing so resumes can avoid repeating
285    /// oversize handling that was already materialized for a given content id and
286    /// consumer signature.
287    pub fn with_prompt_inputs(mut self, prompt_inputs: PromptInputsState) -> Self {
288        let is_empty = prompt_inputs.planning.is_none()
289            && prompt_inputs.development.is_none()
290            && prompt_inputs.review.is_none()
291            && prompt_inputs.commit.is_none()
292            && prompt_inputs.xsd_retry_last_output.is_none();
293        self.prompt_inputs = if is_empty { None } else { Some(prompt_inputs) };
294        self
295    }
296
297    /// Set the logging run_id (timestamp-based) for per-run log directory.
298    ///
299    /// This should be set from the RunLogContext.run_id() to ensure resume
300    /// continuity - when resuming, logs will continue in the same directory.
301    pub fn with_log_run_id(mut self, log_run_id: String) -> Self {
302        self.log_run_id = Some(log_run_id);
303        self
304    }
305
306    /// Build the checkpoint without workspace.
307    ///
308    /// Returns None if required fields (phase, agent configs) are missing.
309    /// Generates a new RunContext if not set.
310    ///
311    /// This method uses CWD-relative file operations for file state capture.
312    /// For pipeline code where a workspace is available, prefer `build_with_workspace()`.
313    pub fn build(self) -> Option<PipelineCheckpoint> {
314        self.build_internal(None)
315    }
316
317    /// Build the checkpoint with workspace-aware file capture.
318    ///
319    /// Returns None if required fields (phase, agent configs) are missing.
320    /// Generates a new RunContext if not set.
321    ///
322    /// This method uses the workspace abstraction for file state capture, which is
323    /// the preferred approach for pipeline code. The workspace provides:
324    /// - Explicit path resolution relative to repo root
325    /// - Testability via `MemoryWorkspace` in tests
326    pub fn build_with_workspace(self, workspace: &dyn Workspace) -> Option<PipelineCheckpoint> {
327        self.build_internal(Some(workspace))
328    }
329
330    /// Internal build implementation that handles both workspace and non-workspace cases.
331    fn build_internal(self, workspace: Option<&dyn Workspace>) -> Option<PipelineCheckpoint> {
332        let phase = self.phase?;
333        let developer_agent = self.developer_agent?;
334        let reviewer_agent = self.reviewer_agent?;
335        let cli_args = self.cli_args?;
336        let developer_config = self.developer_agent_config?;
337        let reviewer_config = self.reviewer_agent_config?;
338
339        let git_user_name = self.git_user_name.as_deref();
340        let git_user_email = self.git_user_email.as_deref();
341
342        // Use provided run context or generate a new one
343        let run_context = self.run_context.unwrap_or_default();
344
345        let working_dir = workspace
346            .map(|ws| ws.root().to_string_lossy().to_string())
347            .or_else(|| {
348                std::env::current_dir()
349                    .ok()
350                    .map(|p| p.to_string_lossy().to_string())
351            })
352            .unwrap_or_default();
353
354        let prompt_md_checksum = workspace.and_then(|ws| {
355            calculate_file_checksum_with_workspace(ws, std::path::Path::new("PROMPT.md"))
356        });
357
358        let (config_path, config_checksum) = if let Some(path) = self.config_path {
359            let path_string = path.to_string_lossy().to_string();
360            let checksum = workspace.and_then(|ws| {
361                let relative = path.strip_prefix(ws.root()).ok().unwrap_or(&path);
362                calculate_file_checksum_with_workspace(ws, relative)
363            });
364            (Some(path_string), checksum)
365        } else {
366            (None, None)
367        };
368
369        let mut checkpoint = PipelineCheckpoint::from_params(CheckpointParams {
370            phase,
371            iteration: self.iteration,
372            total_iterations: self.total_iterations,
373            reviewer_pass: self.reviewer_pass,
374            total_reviewer_passes: self.total_reviewer_passes,
375            developer_agent: &developer_agent,
376            reviewer_agent: &reviewer_agent,
377            cli_args,
378            developer_agent_config: developer_config,
379            reviewer_agent_config: reviewer_config,
380            rebase_state: self.rebase_state,
381            git_user_name,
382            git_user_email,
383            run_id: &run_context.run_id,
384            parent_run_id: run_context.parent_run_id.as_deref(),
385            resume_count: run_context.resume_count,
386            actual_developer_runs: run_context.actual_developer_runs.max(self.iteration),
387            actual_reviewer_runs: run_context.actual_reviewer_runs.max(self.reviewer_pass),
388            working_dir,
389            prompt_md_checksum,
390            config_path,
391            config_checksum,
392        });
393
394        // Populate execution history
395        checkpoint.execution_history = self.execution_history;
396
397        // Populate prompt history
398        checkpoint.prompt_history = self.prompt_history;
399
400        // Populate reducer prompt input materialization state
401        checkpoint.prompt_inputs = self.prompt_inputs;
402
403        // Populate logging run_id
404        checkpoint.log_run_id = self.log_run_id;
405
406        // Capture and populate file system state
407        // Use workspace-based capture when workspace is available (pipeline code),
408        // fall back to CWD-based capture when not (CLI layer code).
409        let executor_ref = self.executor.as_ref().map(|e| e.as_ref());
410        checkpoint.file_system_state = if let Some(ws) = workspace {
411            executor_ref.map(|executor| FileSystemState::capture_with_workspace(ws, executor))
412        } else {
413            Some(FileSystemState::capture_with_optional_executor_impl(
414                executor_ref,
415            ))
416        };
417
418        // Capture and populate environment snapshot
419        checkpoint.env_snapshot =
420            Some(crate::checkpoint::state::EnvironmentSnapshot::capture_current());
421
422        Some(checkpoint)
423    }
424}
425
426/// Convert ReviewDepth to a string representation.
427fn review_depth_to_string(depth: ReviewDepth) -> Option<String> {
428    match depth {
429        ReviewDepth::Standard => Some("standard".to_string()),
430        ReviewDepth::Comprehensive => Some("comprehensive".to_string()),
431        ReviewDepth::Security => Some("security".to_string()),
432        ReviewDepth::Incremental => Some("incremental".to_string()),
433    }
434}
435
436#[cfg(test)]
437mod tests;