ricecoder_workflows/
error_handler.rs

1//! Error handling and recovery for workflow steps
2
3use crate::error::{WorkflowError, WorkflowResult};
4#[allow(unused_imports)]
5use crate::models::{ErrorAction, RiskFactors, StepStatus, Workflow, WorkflowState};
6use crate::state::StateManager;
7use std::time::Duration;
8
9/// Handles step execution errors and applies error actions
10///
11/// Responsible for:
12/// - Catching step execution errors
13/// - Applying error actions (retry, skip, fail, rollback)
14/// - Tracking error history
15/// - Managing retry logic with exponential backoff
16pub struct ErrorHandler;
17
18/// Error history entry for tracking errors across retries
19#[derive(Debug, Clone)]
20pub struct ErrorHistoryEntry {
21    /// Error message
22    pub error: String,
23    /// Attempt number (1-indexed)
24    pub attempt: usize,
25    /// Timestamp of error
26    pub timestamp: chrono::DateTime<chrono::Utc>,
27}
28
29/// Retry state for tracking retry attempts
30#[derive(Debug, Clone)]
31pub struct RetryState {
32    /// Current attempt number (1-indexed)
33    pub attempt: usize,
34    /// Maximum attempts allowed
35    pub max_attempts: usize,
36    /// Delay between retries in milliseconds
37    pub delay_ms: u64,
38    /// Error history
39    pub history: Vec<ErrorHistoryEntry>,
40}
41
42impl RetryState {
43    /// Create a new retry state
44    pub fn new(max_attempts: usize, delay_ms: u64) -> Self {
45        Self {
46            attempt: 1,
47            max_attempts,
48            delay_ms,
49            history: Vec::new(),
50        }
51    }
52
53    /// Check if more retries are available
54    pub fn can_retry(&self) -> bool {
55        self.attempt < self.max_attempts
56    }
57
58    /// Get the delay for the next retry with exponential backoff
59    ///
60    /// Calculates: delay_ms * 2^(attempt - 1)
61    pub fn get_backoff_delay(&self) -> Duration {
62        let backoff_factor = 2_u64.pow((self.attempt - 1) as u32);
63        Duration::from_millis(self.delay_ms * backoff_factor)
64    }
65
66    /// Record an error and increment attempt counter
67    pub fn record_error(&mut self, error: String) {
68        self.history.push(ErrorHistoryEntry {
69            error,
70            attempt: self.attempt,
71            timestamp: chrono::Utc::now(),
72        });
73        self.attempt += 1;
74    }
75
76    /// Get the error history
77    pub fn get_history(&self) -> &[ErrorHistoryEntry] {
78        &self.history
79    }
80}
81
82impl ErrorHandler {
83    /// Handle a step execution error
84    ///
85    /// Applies the specified error action and updates the workflow state accordingly.
86    /// Returns true if the workflow should continue, false if it should stop.
87    pub fn handle_error(
88        workflow: &Workflow,
89        state: &mut WorkflowState,
90        step_id: &str,
91        error: String,
92    ) -> WorkflowResult<bool> {
93        // Find the step
94        let step = workflow
95            .steps
96            .iter()
97            .find(|s| s.id == step_id)
98            .ok_or_else(|| WorkflowError::NotFound(format!("Step not found: {}", step_id)))?;
99
100        // Apply the error action
101        match &step.on_error {
102            ErrorAction::Fail => {
103                // Mark step as failed and stop workflow
104                StateManager::fail_step(state, step_id.to_string(), error, 0);
105                Ok(false)
106            }
107            ErrorAction::Retry { .. } => {
108                // Retry logic is handled by the caller
109                // This just records the error for retry tracking
110                StateManager::fail_step(state, step_id.to_string(), error, 0);
111                Ok(true) // Signal that retry should be attempted
112            }
113            ErrorAction::Skip => {
114                // Skip the step and continue
115                StateManager::skip_step(state, step_id.to_string());
116                Ok(true)
117            }
118            ErrorAction::Rollback => {
119                // Rollback is handled separately
120                StateManager::fail_step(state, step_id.to_string(), error, 0);
121                Ok(false)
122            }
123        }
124    }
125
126    /// Check if a step should be retried
127    pub fn should_retry(workflow: &Workflow, step_id: &str) -> bool {
128        workflow
129            .steps
130            .iter()
131            .find(|s| s.id == step_id)
132            .map(|step| matches!(step.on_error, ErrorAction::Retry { .. }))
133            .unwrap_or(false)
134    }
135
136    /// Get retry configuration for a step
137    pub fn get_retry_config(workflow: &Workflow, step_id: &str) -> Option<(usize, u64)> {
138        workflow
139            .steps
140            .iter()
141            .find(|s| s.id == step_id)
142            .and_then(|step| match &step.on_error {
143                ErrorAction::Retry {
144                    max_attempts,
145                    delay_ms,
146                } => Some((*max_attempts, *delay_ms)),
147                _ => None,
148            })
149    }
150
151    /// Check if a step should be skipped on error
152    pub fn should_skip_on_error(workflow: &Workflow, step_id: &str) -> bool {
153        workflow
154            .steps
155            .iter()
156            .find(|s| s.id == step_id)
157            .map(|step| matches!(step.on_error, ErrorAction::Skip))
158            .unwrap_or(false)
159    }
160
161    /// Check if a step should rollback on error
162    pub fn should_rollback_on_error(workflow: &Workflow, step_id: &str) -> bool {
163        workflow
164            .steps
165            .iter()
166            .find(|s| s.id == step_id)
167            .map(|step| matches!(step.on_error, ErrorAction::Rollback))
168            .unwrap_or(false)
169    }
170
171    /// Get the error action for a step
172    pub fn get_error_action(workflow: &Workflow, step_id: &str) -> Option<ErrorAction> {
173        workflow
174            .steps
175            .iter()
176            .find(|s| s.id == step_id)
177            .map(|step| step.on_error.clone())
178    }
179
180    /// Capture error details in step result
181    ///
182    /// Stores error type, message, and stack trace in the step result.
183    pub fn capture_error(
184        state: &mut WorkflowState,
185        step_id: &str,
186        error_type: &str,
187        error_message: &str,
188        stack_trace: Option<&str>,
189    ) -> WorkflowResult<()> {
190        if let Some(result) = state.step_results.get_mut(step_id) {
191            result.error = Some(format!(
192                "Type: {}\nMessage: {}\n{}",
193                error_type,
194                error_message,
195                stack_trace.unwrap_or("")
196            ));
197            result.status = StepStatus::Failed;
198        }
199        Ok(())
200    }
201
202    /// Get error details from a step result
203    pub fn get_error_details(state: &WorkflowState, step_id: &str) -> Option<String> {
204        state
205            .step_results
206            .get(step_id)
207            .and_then(|result| result.error.clone())
208    }
209
210    /// Check if a step has an error
211    pub fn has_error(state: &WorkflowState, step_id: &str) -> bool {
212        state
213            .step_results
214            .get(step_id)
215            .map(|result| result.error.is_some())
216            .unwrap_or(false)
217    }
218
219    /// Get all errors in the workflow
220    pub fn get_all_errors(state: &WorkflowState) -> Vec<(String, String)> {
221        state
222            .step_results
223            .iter()
224            .filter_map(|(step_id, result)| {
225                result
226                    .error
227                    .as_ref()
228                    .map(|error| (step_id.clone(), error.clone()))
229            })
230            .collect()
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use crate::models::{AgentStep, StepConfig, StepType, WorkflowConfig, WorkflowStep};
238
239    fn create_workflow_with_error_action(error_action: ErrorAction) -> Workflow {
240        Workflow {
241            id: "test-workflow".to_string(),
242            name: "Test Workflow".to_string(),
243            description: "A test workflow".to_string(),
244            parameters: vec![],
245            steps: vec![WorkflowStep {
246                id: "step1".to_string(),
247                name: "Step 1".to_string(),
248                step_type: StepType::Agent(AgentStep {
249                    agent_id: "test-agent".to_string(),
250                    task: "test-task".to_string(),
251                }),
252                config: StepConfig {
253                    config: serde_json::json!({"param": "value"}),
254                },
255                dependencies: vec![],
256                approval_required: false,
257                on_error: error_action,
258                risk_score: None,
259                risk_factors: RiskFactors::default(),
260            }],
261            config: WorkflowConfig {
262                timeout_ms: None,
263                max_parallel: None,
264            },
265        }
266    }
267
268    #[test]
269    fn test_retry_state_creation() {
270        let retry_state = RetryState::new(3, 100);
271        assert_eq!(retry_state.attempt, 1);
272        assert_eq!(retry_state.max_attempts, 3);
273        assert_eq!(retry_state.delay_ms, 100);
274        assert!(retry_state.can_retry());
275    }
276
277    #[test]
278    fn test_retry_state_exponential_backoff() {
279        let mut retry_state = RetryState::new(3, 100);
280
281        // First retry: 100ms * 2^0 = 100ms
282        let delay1 = retry_state.get_backoff_delay();
283        assert_eq!(delay1.as_millis(), 100);
284
285        retry_state.record_error("Error 1".to_string());
286
287        // Second retry: 100ms * 2^1 = 200ms
288        let delay2 = retry_state.get_backoff_delay();
289        assert_eq!(delay2.as_millis(), 200);
290
291        retry_state.record_error("Error 2".to_string());
292
293        // Third retry: 100ms * 2^2 = 400ms
294        let delay3 = retry_state.get_backoff_delay();
295        assert_eq!(delay3.as_millis(), 400);
296
297        retry_state.record_error("Error 3".to_string());
298
299        // No more retries
300        assert!(!retry_state.can_retry());
301    }
302
303    #[test]
304    fn test_retry_state_error_history() {
305        let mut retry_state = RetryState::new(3, 100);
306
307        retry_state.record_error("Error 1".to_string());
308        retry_state.record_error("Error 2".to_string());
309
310        let history = retry_state.get_history();
311        assert_eq!(history.len(), 2);
312        assert_eq!(history[0].error, "Error 1");
313        assert_eq!(history[0].attempt, 1);
314        assert_eq!(history[1].error, "Error 2");
315        assert_eq!(history[1].attempt, 2);
316    }
317
318    #[test]
319    fn test_should_retry() {
320        let workflow = create_workflow_with_error_action(ErrorAction::Retry {
321            max_attempts: 3,
322            delay_ms: 100,
323        });
324
325        assert!(ErrorHandler::should_retry(&workflow, "step1"));
326    }
327
328    #[test]
329    fn test_should_not_retry_on_fail() {
330        let workflow = create_workflow_with_error_action(ErrorAction::Fail);
331
332        assert!(!ErrorHandler::should_retry(&workflow, "step1"));
333    }
334
335    #[test]
336    fn test_should_skip_on_error() {
337        let workflow = create_workflow_with_error_action(ErrorAction::Skip);
338
339        assert!(ErrorHandler::should_skip_on_error(&workflow, "step1"));
340    }
341
342    #[test]
343    fn test_should_rollback_on_error() {
344        let workflow = create_workflow_with_error_action(ErrorAction::Rollback);
345
346        assert!(ErrorHandler::should_rollback_on_error(&workflow, "step1"));
347    }
348
349    #[test]
350    fn test_get_retry_config() {
351        let workflow = create_workflow_with_error_action(ErrorAction::Retry {
352            max_attempts: 3,
353            delay_ms: 100,
354        });
355
356        let config = ErrorHandler::get_retry_config(&workflow, "step1");
357        assert_eq!(config, Some((3, 100)));
358    }
359
360    #[test]
361    fn test_get_error_action() {
362        let workflow = create_workflow_with_error_action(ErrorAction::Skip);
363
364        let action = ErrorHandler::get_error_action(&workflow, "step1");
365        assert!(matches!(action, Some(ErrorAction::Skip)));
366    }
367
368    #[test]
369    fn test_capture_error() {
370        let workflow = create_workflow_with_error_action(ErrorAction::Fail);
371        let mut state = StateManager::create_state(&workflow);
372
373        StateManager::start_step(&mut state, "step1".to_string());
374
375        let result = ErrorHandler::capture_error(
376            &mut state,
377            "step1",
378            "RuntimeError",
379            "Something went wrong",
380            Some("at line 42"),
381        );
382
383        assert!(result.is_ok());
384        assert!(ErrorHandler::has_error(&state, "step1"));
385
386        let error = ErrorHandler::get_error_details(&state, "step1");
387        assert!(error.is_some());
388        let error_str = error.unwrap();
389        assert!(error_str.contains("RuntimeError"));
390        assert!(error_str.contains("Something went wrong"));
391        assert!(error_str.contains("at line 42"));
392    }
393
394    #[test]
395    fn test_get_all_errors() {
396        let workflow = Workflow {
397            id: "test-workflow".to_string(),
398            name: "Test Workflow".to_string(),
399            description: "A test workflow".to_string(),
400            parameters: vec![],
401            steps: vec![
402                WorkflowStep {
403                    id: "step1".to_string(),
404                    name: "Step 1".to_string(),
405                    step_type: StepType::Agent(AgentStep {
406                        agent_id: "test-agent".to_string(),
407                        task: "test-task".to_string(),
408                    }),
409                    config: StepConfig {
410                        config: serde_json::json!({}),
411                    },
412                    dependencies: vec![],
413                    approval_required: false,
414                    on_error: ErrorAction::Fail,
415                    risk_score: None,
416                    risk_factors: RiskFactors::default(),
417                },
418                WorkflowStep {
419                    id: "step2".to_string(),
420                    name: "Step 2".to_string(),
421                    step_type: StepType::Agent(AgentStep {
422                        agent_id: "test-agent".to_string(),
423                        task: "test-task".to_string(),
424                    }),
425                    config: StepConfig {
426                        config: serde_json::json!({}),
427                    },
428                    dependencies: vec![],
429                    approval_required: false,
430                    on_error: ErrorAction::Fail,
431                    risk_score: None,
432                    risk_factors: RiskFactors::default(),
433                },
434            ],
435            config: WorkflowConfig {
436                timeout_ms: None,
437                max_parallel: None,
438            },
439        };
440
441        let mut state = StateManager::create_state(&workflow);
442
443        StateManager::start_step(&mut state, "step1".to_string());
444        StateManager::start_step(&mut state, "step2".to_string());
445
446        ErrorHandler::capture_error(&mut state, "step1", "Error1", "Message1", None).ok();
447        ErrorHandler::capture_error(&mut state, "step2", "Error2", "Message2", None).ok();
448
449        let errors = ErrorHandler::get_all_errors(&state);
450        assert_eq!(errors.len(), 2);
451    }
452}