Skip to main content

enact_core/kernel/
error.rs

1//! Execution Error Taxonomy - Deterministic Failure Semantics
2//!
3//! This module defines the formal `ExecutionError` model that allows the kernel
4//! to make deterministic decisions about retries, billing, and failure reporting.
5//!
6//! ## Design Principles
7//!
8//! 1. **No Generic Errors**: Every error MUST be categorized
9//! 2. **Retry Policies Are Explicit**: Each error declares its retry behavior
10//! 3. **Idempotency Is Tracked**: Side-effect operations declare idempotency requirements
11//! 4. **Backoff Is Deterministic**: Retry timing is calculated, not random
12//!
13//! ## Error Categories
14//!
15//! - `LlmError`: LLM provider errors (usually retryable with backoff)
16//! - `ToolError`: Tool execution errors (may be retryable)
17//! - `PolicyViolation`: Policy/guardrail violation (NEVER retryable)
18//! - `Timeout`: Execution timeout (retryable with extended timeout)
19//! - `QuotaExceeded`: Resource quota exceeded (NEVER retryable)
20//! - `KernelInternal`: Internal kernel error (NEVER retryable)
21//! - `ValidationError`: Input validation error (NEVER retryable)
22//! - `NetworkError`: Network/connectivity error (usually retryable)
23//!
24//! @see docs/feat-02-error-taxonomy.md
25//! @see packages/enact-schemas/src/execution.schemas.ts
26
27use super::ids::StepId;
28use serde::{Deserialize, Serialize};
29use std::time::Duration;
30
31// =============================================================================
32// Error Categories
33// =============================================================================
34
35/// High-level error categories for deterministic recovery
36///
37/// Categories determine retry behavior and are the primary classification
38/// for all execution failures.
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40#[serde(rename_all = "PascalCase")]
41pub enum ExecutionErrorCategory {
42    /// LLM provider error (rate limit, content filter, context overflow)
43    LlmError,
44    /// Tool execution error (tool crashed, invalid output)
45    ToolError,
46    /// Policy/guardrail violation (FATAL - never retry)
47    PolicyViolation,
48    /// Execution timeout (step or wall clock)
49    Timeout,
50    /// Resource quota exceeded (FATAL - never retry)
51    QuotaExceeded,
52    /// Internal kernel error (FATAL - never retry)
53    KernelInternal,
54    /// Input validation error (FATAL - never retry)
55    ValidationError,
56    /// Network/connectivity error (usually retryable)
57    NetworkError,
58}
59
60impl ExecutionErrorCategory {
61    /// Check if this category is fatal (never retryable)
62    pub fn is_fatal(&self) -> bool {
63        matches!(
64            self,
65            Self::PolicyViolation
66                | Self::QuotaExceeded
67                | Self::KernelInternal
68                | Self::ValidationError
69        )
70    }
71
72    /// Get the default retry policy for this category
73    pub fn default_retry_policy(&self) -> RetryPolicy {
74        match self {
75            Self::LlmError => RetryPolicy {
76                retryable: true,
77                max_retries: 3,
78                backoff_strategy: BackoffStrategy::Exponential,
79                base_delay: Duration::from_millis(1000),
80                max_delay: Duration::from_millis(30000),
81                requires_idempotency_key: false,
82            },
83            Self::ToolError => RetryPolicy {
84                retryable: true,
85                max_retries: 2,
86                backoff_strategy: BackoffStrategy::Constant,
87                base_delay: Duration::from_millis(500),
88                max_delay: Duration::from_millis(5000),
89                requires_idempotency_key: true, // Tools may have side effects
90            },
91            Self::PolicyViolation => RetryPolicy::fatal(),
92            Self::Timeout => RetryPolicy {
93                retryable: true,
94                max_retries: 1,
95                backoff_strategy: BackoffStrategy::Constant,
96                base_delay: Duration::ZERO,
97                max_delay: Duration::ZERO,
98                requires_idempotency_key: true,
99            },
100            Self::QuotaExceeded => RetryPolicy::fatal(),
101            Self::KernelInternal => RetryPolicy::fatal(),
102            Self::ValidationError => RetryPolicy::fatal(),
103            Self::NetworkError => RetryPolicy {
104                retryable: true,
105                max_retries: 3,
106                backoff_strategy: BackoffStrategy::Exponential,
107                base_delay: Duration::from_millis(500),
108                max_delay: Duration::from_millis(15000),
109                requires_idempotency_key: true,
110            },
111        }
112    }
113}
114
115impl std::fmt::Display for ExecutionErrorCategory {
116    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117        match self {
118            Self::LlmError => write!(f, "LlmError"),
119            Self::ToolError => write!(f, "ToolError"),
120            Self::PolicyViolation => write!(f, "PolicyViolation"),
121            Self::Timeout => write!(f, "Timeout"),
122            Self::QuotaExceeded => write!(f, "QuotaExceeded"),
123            Self::KernelInternal => write!(f, "KernelInternal"),
124            Self::ValidationError => write!(f, "ValidationError"),
125            Self::NetworkError => write!(f, "NetworkError"),
126        }
127    }
128}
129
130// =============================================================================
131// Backoff Strategy
132// =============================================================================
133
134/// How to space out retry attempts
135#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
136#[serde(rename_all = "lowercase")]
137pub enum BackoffStrategy {
138    /// No delay between retries
139    #[default]
140    None,
141    /// Fixed delay between retries
142    Constant,
143    /// Linearly increasing delay (base * attempt)
144    Linear,
145    /// Exponentially increasing delay (base * 2^(attempt-1))
146    Exponential,
147}
148
149impl BackoffStrategy {
150    /// Calculate delay for a given attempt number (1-indexed)
151    pub fn calculate_delay(&self, base: Duration, attempt: u32, max: Duration) -> Duration {
152        let delay = match self {
153            Self::None => Duration::ZERO,
154            Self::Constant => base,
155            Self::Linear => base * attempt,
156            Self::Exponential => {
157                let multiplier = 2u64.saturating_pow(attempt.saturating_sub(1));
158                base.saturating_mul(multiplier as u32)
159            }
160        };
161        std::cmp::min(delay, max)
162    }
163}
164
165// =============================================================================
166// Retry Policy
167// =============================================================================
168
169/// Deterministic retry behavior for an error
170///
171/// The kernel uses this to decide:
172/// 1. Should we retry? (retryable)
173/// 2. How many times? (max_retries)
174/// 3. How long to wait? (backoff_strategy + base_delay)
175/// 4. Is the operation idempotent? (requires_idempotency_key)
176#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
177pub struct RetryPolicy {
178    /// Whether this error is retryable at all
179    pub retryable: bool,
180
181    /// Maximum number of retry attempts (0 = no retries)
182    pub max_retries: u32,
183
184    /// Backoff strategy for spacing retries
185    pub backoff_strategy: BackoffStrategy,
186
187    /// Base delay for backoff calculation
188    #[serde(with = "duration_millis")]
189    pub base_delay: Duration,
190
191    /// Maximum delay (caps exponential backoff)
192    #[serde(with = "duration_millis")]
193    pub max_delay: Duration,
194
195    /// Whether the operation requires an idempotency key for safe retry
196    /// If true, the kernel MUST generate an IdempotencyKey before retrying
197    pub requires_idempotency_key: bool,
198}
199
200impl RetryPolicy {
201    /// Create a fatal (non-retryable) policy
202    pub fn fatal() -> Self {
203        Self {
204            retryable: false,
205            max_retries: 0,
206            backoff_strategy: BackoffStrategy::None,
207            base_delay: Duration::ZERO,
208            max_delay: Duration::ZERO,
209            requires_idempotency_key: false,
210        }
211    }
212
213    /// Create a simple retryable policy
214    pub fn retryable(max_retries: u32) -> Self {
215        Self {
216            retryable: true,
217            max_retries,
218            backoff_strategy: BackoffStrategy::Exponential,
219            base_delay: Duration::from_millis(1000),
220            max_delay: Duration::from_millis(30000),
221            requires_idempotency_key: false,
222        }
223    }
224
225    /// Calculate delay for a given attempt number
226    pub fn delay_for_attempt(&self, attempt: u32) -> Duration {
227        self.backoff_strategy
228            .calculate_delay(self.base_delay, attempt, self.max_delay)
229    }
230
231    /// Check if a retry should be attempted for the given attempt number
232    pub fn should_retry(&self, attempt: u32) -> bool {
233        self.retryable && attempt <= self.max_retries
234    }
235}
236
237impl Default for RetryPolicy {
238    fn default() -> Self {
239        Self::fatal()
240    }
241}
242
243// =============================================================================
244// LLM Error Codes
245// =============================================================================
246
247/// Specific LLM provider error codes
248#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
249#[serde(rename_all = "snake_case")]
250pub enum LlmErrorCode {
251    /// 429 - Too many requests
252    RateLimit,
253    /// Context window exceeded
254    ContextOverflow,
255    /// Content blocked by safety filter
256    ContentFiltered,
257    /// Malformed request
258    InvalidRequest,
259    /// Authentication/authorization failed
260    AuthFailed,
261    /// Model not available
262    ModelUnavailable,
263    /// Generic provider error
264    ProviderError,
265}
266
267impl LlmErrorCode {
268    /// Check if this error code is typically retryable
269    pub fn is_retryable(&self) -> bool {
270        matches!(
271            self,
272            Self::RateLimit | Self::ModelUnavailable | Self::ProviderError
273        )
274    }
275}
276
277impl std::fmt::Display for LlmErrorCode {
278    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
279        match self {
280            Self::RateLimit => write!(f, "rate_limit"),
281            Self::ContextOverflow => write!(f, "context_overflow"),
282            Self::ContentFiltered => write!(f, "content_filtered"),
283            Self::InvalidRequest => write!(f, "invalid_request"),
284            Self::AuthFailed => write!(f, "auth_failed"),
285            Self::ModelUnavailable => write!(f, "model_unavailable"),
286            Self::ProviderError => write!(f, "provider_error"),
287        }
288    }
289}
290
291// =============================================================================
292// Tool Error Codes
293// =============================================================================
294
295/// Specific tool error codes
296#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
297#[serde(rename_all = "snake_case")]
298pub enum ToolErrorCode {
299    /// Tool not registered
300    NotFound,
301    /// Tool not allowed by policy
302    PermissionDenied,
303    /// Invalid tool arguments
304    InvalidInput,
305    /// Tool crashed or returned error
306    ExecutionFailed,
307    /// Tool execution timed out
308    Timeout,
309    /// Tool returned invalid output
310    OutputInvalid,
311}
312
313impl ToolErrorCode {
314    /// Check if this error code is typically retryable
315    pub fn is_retryable(&self) -> bool {
316        matches!(self, Self::Timeout | Self::ExecutionFailed)
317    }
318}
319
320impl std::fmt::Display for ToolErrorCode {
321    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
322        match self {
323            Self::NotFound => write!(f, "not_found"),
324            Self::PermissionDenied => write!(f, "permission_denied"),
325            Self::InvalidInput => write!(f, "invalid_input"),
326            Self::ExecutionFailed => write!(f, "execution_failed"),
327            Self::Timeout => write!(f, "timeout"),
328            Self::OutputInvalid => write!(f, "output_invalid"),
329        }
330    }
331}
332
333// =============================================================================
334// Execution Error
335// =============================================================================
336
337/// The primary error type for all execution failures
338///
339/// This structured error enables:
340/// 1. Deterministic retry decisions
341/// 2. Accurate billing (distinguishes user errors from system errors)
342/// 3. Compliance narratives (full audit trail)
343/// 4. HTTP/gRPC status code mapping
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub struct ExecutionError {
346    /// High-level error category
347    pub category: ExecutionErrorCategory,
348
349    /// Human-readable error message
350    pub message: String,
351
352    /// Retry policy for this error
353    pub retry_policy: RetryPolicy,
354
355    /// Specific error code within the category
356    pub code: Option<String>,
357
358    /// Attempt number (1-indexed, for tracking retries)
359    pub attempt: u32,
360
361    /// Step ID where the error occurred
362    pub step_id: Option<StepId>,
363
364    /// Provider name (for LLM/Tool errors)
365    pub provider: Option<String>,
366
367    /// HTTP status code (if applicable)
368    pub http_status: Option<u16>,
369
370    /// Additional structured details
371    pub details: Option<serde_json::Value>,
372
373    /// Timestamp when error occurred (milliseconds since epoch)
374    pub occurred_at: i64,
375}
376
377impl ExecutionError {
378    /// Create a new ExecutionError with default retry policy for the category
379    pub fn new(category: ExecutionErrorCategory, message: impl Into<String>) -> Self {
380        Self {
381            category,
382            message: message.into(),
383            retry_policy: category.default_retry_policy(),
384            code: None,
385            attempt: 1,
386            step_id: None,
387            provider: None,
388            http_status: None,
389            details: None,
390            occurred_at: chrono::Utc::now().timestamp_millis(),
391        }
392    }
393
394    // Builder methods
395
396    /// Set the error code
397    pub fn with_code(mut self, code: impl Into<String>) -> Self {
398        self.code = Some(code.into());
399        self
400    }
401
402    /// Set the attempt number
403    pub fn with_attempt(mut self, attempt: u32) -> Self {
404        self.attempt = attempt;
405        self
406    }
407
408    /// Set the step ID
409    pub fn with_step_id(mut self, step_id: StepId) -> Self {
410        self.step_id = Some(step_id);
411        self
412    }
413
414    /// Set the provider name
415    pub fn with_provider(mut self, provider: impl Into<String>) -> Self {
416        self.provider = Some(provider.into());
417        self
418    }
419
420    /// Set the HTTP status code
421    pub fn with_http_status(mut self, status: u16) -> Self {
422        self.http_status = Some(status);
423        self
424    }
425
426    /// Set additional details
427    pub fn with_details(mut self, details: serde_json::Value) -> Self {
428        self.details = Some(details);
429        self
430    }
431
432    /// Override the retry policy
433    pub fn with_retry_policy(mut self, policy: RetryPolicy) -> Self {
434        self.retry_policy = policy;
435        self
436    }
437
438    // Convenience constructors
439
440    /// Create an LLM error
441    pub fn llm(code: LlmErrorCode, message: impl Into<String>) -> Self {
442        Self::new(ExecutionErrorCategory::LlmError, message).with_code(code.to_string())
443    }
444
445    /// Create a tool error
446    pub fn tool(code: ToolErrorCode, message: impl Into<String>) -> Self {
447        Self::new(ExecutionErrorCategory::ToolError, message).with_code(code.to_string())
448    }
449
450    /// Create a policy violation error
451    pub fn policy_violation(message: impl Into<String>) -> Self {
452        Self::new(ExecutionErrorCategory::PolicyViolation, message)
453    }
454
455    /// Create a timeout error
456    pub fn timeout(message: impl Into<String>) -> Self {
457        Self::new(ExecutionErrorCategory::Timeout, message)
458    }
459
460    /// Create a quota exceeded error
461    pub fn quota_exceeded(message: impl Into<String>) -> Self {
462        Self::new(ExecutionErrorCategory::QuotaExceeded, message)
463    }
464
465    /// Create a kernel internal error
466    pub fn kernel_internal(message: impl Into<String>) -> Self {
467        Self::new(ExecutionErrorCategory::KernelInternal, message)
468    }
469
470    /// Create a validation error
471    pub fn validation(message: impl Into<String>) -> Self {
472        Self::new(ExecutionErrorCategory::ValidationError, message)
473    }
474
475    /// Create a network error
476    pub fn network(message: impl Into<String>) -> Self {
477        Self::new(ExecutionErrorCategory::NetworkError, message)
478    }
479
480    // Query methods
481
482    /// Check if this error is retryable
483    pub fn is_retryable(&self) -> bool {
484        self.retry_policy.retryable
485    }
486
487    /// Check if this error is fatal (will never be retried)
488    pub fn is_fatal(&self) -> bool {
489        self.category.is_fatal()
490    }
491
492    /// Check if a retry should be attempted
493    pub fn should_retry(&self) -> bool {
494        self.retry_policy.should_retry(self.attempt)
495    }
496
497    /// Get the delay before the next retry attempt
498    pub fn retry_delay(&self) -> Duration {
499        self.retry_policy.delay_for_attempt(self.attempt)
500    }
501
502    /// Create a new error for the next retry attempt
503    pub fn next_attempt(mut self) -> Self {
504        self.attempt += 1;
505        self.occurred_at = chrono::Utc::now().timestamp_millis();
506        self
507    }
508
509    /// Map to HTTP status code
510    pub fn to_http_status(&self) -> u16 {
511        if let Some(status) = self.http_status {
512            return status;
513        }
514
515        match self.category {
516            ExecutionErrorCategory::LlmError => 502,  // Bad Gateway
517            ExecutionErrorCategory::ToolError => 500, // Internal Server Error
518            ExecutionErrorCategory::PolicyViolation => 403, // Forbidden
519            ExecutionErrorCategory::Timeout => 504,   // Gateway Timeout
520            ExecutionErrorCategory::QuotaExceeded => 429, // Too Many Requests
521            ExecutionErrorCategory::KernelInternal => 500, // Internal Server Error
522            ExecutionErrorCategory::ValidationError => 400, // Bad Request
523            ExecutionErrorCategory::NetworkError => 503, // Service Unavailable
524        }
525    }
526}
527
528impl std::fmt::Display for ExecutionError {
529    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
530        write!(f, "[{}] {}", self.category, self.message)?;
531        if let Some(code) = &self.code {
532            write!(f, " ({})", code)?;
533        }
534        if self.attempt > 1 {
535            write!(f, " [attempt {}]", self.attempt)?;
536        }
537        Ok(())
538    }
539}
540
541impl std::error::Error for ExecutionError {}
542
543// Implement From for common error types
544
545impl From<reqwest::Error> for ExecutionError {
546    fn from(err: reqwest::Error) -> Self {
547        if err.is_timeout() {
548            Self::timeout(format!("HTTP request timed out: {}", err))
549        } else if err.is_connect() {
550            Self::network(format!("Connection failed: {}", err))
551        } else if err.is_status() {
552            let status = err.status().map(|s| s.as_u16()).unwrap_or(500);
553            Self::network(format!("HTTP error: {}", err)).with_http_status(status)
554        } else {
555            Self::network(format!("HTTP error: {}", err))
556        }
557    }
558}
559
560impl From<serde_json::Error> for ExecutionError {
561    fn from(err: serde_json::Error) -> Self {
562        Self::validation(format!("JSON error: {}", err))
563    }
564}
565
566impl From<std::io::Error> for ExecutionError {
567    fn from(err: std::io::Error) -> Self {
568        match err.kind() {
569            std::io::ErrorKind::TimedOut => Self::timeout(format!("IO timeout: {}", err)),
570            std::io::ErrorKind::ConnectionRefused
571            | std::io::ErrorKind::ConnectionReset
572            | std::io::ErrorKind::ConnectionAborted => {
573                Self::network(format!("Connection error: {}", err))
574            }
575            _ => Self::kernel_internal(format!("IO error: {}", err)),
576        }
577    }
578}
579
580// =============================================================================
581// Serde helpers for Duration
582// =============================================================================
583
584mod duration_millis {
585    use serde::{Deserialize, Deserializer, Serializer};
586    use std::time::Duration;
587
588    pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
589    where
590        S: Serializer,
591    {
592        serializer.serialize_u64(duration.as_millis() as u64)
593    }
594
595    pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
596    where
597        D: Deserializer<'de>,
598    {
599        let millis = u64::deserialize(deserializer)?;
600        Ok(Duration::from_millis(millis))
601    }
602}
603
604// =============================================================================
605// Tests
606// =============================================================================
607
608#[cfg(test)]
609mod tests {
610    use super::*;
611
612    #[test]
613    fn test_error_categories_fatality() {
614        assert!(ExecutionErrorCategory::PolicyViolation.is_fatal());
615        assert!(ExecutionErrorCategory::QuotaExceeded.is_fatal());
616        assert!(ExecutionErrorCategory::KernelInternal.is_fatal());
617        assert!(ExecutionErrorCategory::ValidationError.is_fatal());
618
619        assert!(!ExecutionErrorCategory::LlmError.is_fatal());
620        assert!(!ExecutionErrorCategory::ToolError.is_fatal());
621        assert!(!ExecutionErrorCategory::Timeout.is_fatal());
622        assert!(!ExecutionErrorCategory::NetworkError.is_fatal());
623    }
624
625    #[test]
626    fn test_default_retry_policies() {
627        let llm_policy = ExecutionErrorCategory::LlmError.default_retry_policy();
628        assert!(llm_policy.retryable);
629        assert_eq!(llm_policy.max_retries, 3);
630        assert_eq!(llm_policy.backoff_strategy, BackoffStrategy::Exponential);
631
632        let fatal_policy = ExecutionErrorCategory::PolicyViolation.default_retry_policy();
633        assert!(!fatal_policy.retryable);
634        assert_eq!(fatal_policy.max_retries, 0);
635    }
636
637    #[test]
638    fn test_exponential_backoff() {
639        let strategy = BackoffStrategy::Exponential;
640        let base = Duration::from_millis(1000);
641        let max = Duration::from_millis(30000);
642
643        assert_eq!(
644            strategy.calculate_delay(base, 1, max),
645            Duration::from_millis(1000)
646        );
647        assert_eq!(
648            strategy.calculate_delay(base, 2, max),
649            Duration::from_millis(2000)
650        );
651        assert_eq!(
652            strategy.calculate_delay(base, 3, max),
653            Duration::from_millis(4000)
654        );
655        assert_eq!(
656            strategy.calculate_delay(base, 4, max),
657            Duration::from_millis(8000)
658        );
659        assert_eq!(
660            strategy.calculate_delay(base, 5, max),
661            Duration::from_millis(16000)
662        );
663        // Should cap at max
664        assert_eq!(
665            strategy.calculate_delay(base, 6, max),
666            Duration::from_millis(30000)
667        );
668    }
669
670    #[test]
671    fn test_execution_error_creation() {
672        let error = ExecutionError::llm(LlmErrorCode::RateLimit, "Too many requests")
673            .with_provider("azure")
674            .with_http_status(429);
675
676        assert_eq!(error.category, ExecutionErrorCategory::LlmError);
677        assert_eq!(error.code, Some("rate_limit".to_string()));
678        assert_eq!(error.provider, Some("azure".to_string()));
679        assert_eq!(error.http_status, Some(429));
680        assert!(error.is_retryable());
681        assert!(!error.is_fatal());
682    }
683
684    #[test]
685    fn test_should_retry() {
686        let mut error = ExecutionError::llm(LlmErrorCode::RateLimit, "Rate limited");
687
688        // Default max_retries is 3
689        assert!(error.should_retry()); // attempt 1
690        error = error.next_attempt();
691        assert!(error.should_retry()); // attempt 2
692        error = error.next_attempt();
693        assert!(error.should_retry()); // attempt 3
694        error = error.next_attempt();
695        assert!(!error.should_retry()); // attempt 4 - exceeds max
696    }
697
698    #[test]
699    fn test_fatal_error_never_retries() {
700        let error = ExecutionError::policy_violation("Content blocked");
701
702        assert!(!error.is_retryable());
703        assert!(error.is_fatal());
704        assert!(!error.should_retry());
705    }
706
707    #[test]
708    fn test_http_status_mapping() {
709        assert_eq!(
710            ExecutionError::policy_violation("test").to_http_status(),
711            403
712        );
713        assert_eq!(ExecutionError::quota_exceeded("test").to_http_status(), 429);
714        assert_eq!(ExecutionError::timeout("test").to_http_status(), 504);
715        assert_eq!(ExecutionError::validation("test").to_http_status(), 400);
716    }
717
718    #[test]
719    fn test_error_serialization() {
720        let error = ExecutionError::llm(LlmErrorCode::RateLimit, "Too many requests")
721            .with_provider("azure");
722
723        let json = serde_json::to_string(&error).unwrap();
724        let parsed: ExecutionError = serde_json::from_str(&json).unwrap();
725
726        assert_eq!(parsed.category, error.category);
727        assert_eq!(parsed.message, error.message);
728        assert_eq!(parsed.code, error.code);
729        assert_eq!(parsed.provider, error.provider);
730    }
731
732    // =========================================================================
733    // ExecutionErrorCategory Tests
734    // =========================================================================
735
736    #[test]
737    fn test_error_category_display() {
738        assert_eq!(format!("{}", ExecutionErrorCategory::LlmError), "LlmError");
739        assert_eq!(
740            format!("{}", ExecutionErrorCategory::ToolError),
741            "ToolError"
742        );
743        assert_eq!(
744            format!("{}", ExecutionErrorCategory::PolicyViolation),
745            "PolicyViolation"
746        );
747        assert_eq!(format!("{}", ExecutionErrorCategory::Timeout), "Timeout");
748        assert_eq!(
749            format!("{}", ExecutionErrorCategory::QuotaExceeded),
750            "QuotaExceeded"
751        );
752        assert_eq!(
753            format!("{}", ExecutionErrorCategory::KernelInternal),
754            "KernelInternal"
755        );
756        assert_eq!(
757            format!("{}", ExecutionErrorCategory::ValidationError),
758            "ValidationError"
759        );
760        assert_eq!(
761            format!("{}", ExecutionErrorCategory::NetworkError),
762            "NetworkError"
763        );
764    }
765
766    #[test]
767    fn test_error_category_serde() {
768        let categories = vec![
769            ExecutionErrorCategory::LlmError,
770            ExecutionErrorCategory::ToolError,
771            ExecutionErrorCategory::PolicyViolation,
772            ExecutionErrorCategory::Timeout,
773            ExecutionErrorCategory::QuotaExceeded,
774            ExecutionErrorCategory::KernelInternal,
775            ExecutionErrorCategory::ValidationError,
776            ExecutionErrorCategory::NetworkError,
777        ];
778
779        for cat in categories {
780            let json = serde_json::to_string(&cat).unwrap();
781            let parsed: ExecutionErrorCategory = serde_json::from_str(&json).unwrap();
782            assert_eq!(cat, parsed);
783        }
784    }
785
786    // =========================================================================
787    // BackoffStrategy Tests
788    // =========================================================================
789
790    #[test]
791    fn test_backoff_none() {
792        let strategy = BackoffStrategy::None;
793        let base = Duration::from_millis(1000);
794        let max = Duration::from_millis(30000);
795
796        assert_eq!(strategy.calculate_delay(base, 1, max), Duration::ZERO);
797        assert_eq!(strategy.calculate_delay(base, 5, max), Duration::ZERO);
798    }
799
800    #[test]
801    fn test_backoff_constant() {
802        let strategy = BackoffStrategy::Constant;
803        let base = Duration::from_millis(500);
804        let max = Duration::from_millis(10000);
805
806        assert_eq!(
807            strategy.calculate_delay(base, 1, max),
808            Duration::from_millis(500)
809        );
810        assert_eq!(
811            strategy.calculate_delay(base, 5, max),
812            Duration::from_millis(500)
813        );
814    }
815
816    #[test]
817    fn test_backoff_linear() {
818        let strategy = BackoffStrategy::Linear;
819        let base = Duration::from_millis(1000);
820        let max = Duration::from_millis(30000);
821
822        assert_eq!(
823            strategy.calculate_delay(base, 1, max),
824            Duration::from_millis(1000)
825        );
826        assert_eq!(
827            strategy.calculate_delay(base, 2, max),
828            Duration::from_millis(2000)
829        );
830        assert_eq!(
831            strategy.calculate_delay(base, 3, max),
832            Duration::from_millis(3000)
833        );
834        // Should cap at max
835        assert_eq!(
836            strategy.calculate_delay(base, 100, max),
837            Duration::from_millis(30000)
838        );
839    }
840
841    #[test]
842    fn test_backoff_default() {
843        assert_eq!(BackoffStrategy::default(), BackoffStrategy::None);
844    }
845
846    #[test]
847    fn test_backoff_serde() {
848        let strategies = vec![
849            BackoffStrategy::None,
850            BackoffStrategy::Constant,
851            BackoffStrategy::Linear,
852            BackoffStrategy::Exponential,
853        ];
854
855        for strat in strategies {
856            let json = serde_json::to_string(&strat).unwrap();
857            let parsed: BackoffStrategy = serde_json::from_str(&json).unwrap();
858            assert_eq!(strat, parsed);
859        }
860    }
861
862    // =========================================================================
863    // RetryPolicy Tests
864    // =========================================================================
865
866    #[test]
867    fn test_retry_policy_fatal() {
868        let policy = RetryPolicy::fatal();
869        assert!(!policy.retryable);
870        assert_eq!(policy.max_retries, 0);
871        assert!(!policy.should_retry(1));
872    }
873
874    #[test]
875    fn test_retry_policy_retryable() {
876        let policy = RetryPolicy::retryable(5);
877        assert!(policy.retryable);
878        assert_eq!(policy.max_retries, 5);
879        assert_eq!(policy.backoff_strategy, BackoffStrategy::Exponential);
880    }
881
882    #[test]
883    fn test_retry_policy_delay_for_attempt() {
884        let policy = RetryPolicy {
885            retryable: true,
886            max_retries: 3,
887            backoff_strategy: BackoffStrategy::Constant,
888            base_delay: Duration::from_millis(500),
889            max_delay: Duration::from_millis(5000),
890            requires_idempotency_key: false,
891        };
892
893        assert_eq!(policy.delay_for_attempt(1), Duration::from_millis(500));
894        assert_eq!(policy.delay_for_attempt(2), Duration::from_millis(500));
895    }
896
897    #[test]
898    fn test_retry_policy_should_retry() {
899        let policy = RetryPolicy::retryable(3);
900        assert!(policy.should_retry(1));
901        assert!(policy.should_retry(2));
902        assert!(policy.should_retry(3));
903        assert!(!policy.should_retry(4));
904    }
905
906    #[test]
907    fn test_retry_policy_default() {
908        let policy = RetryPolicy::default();
909        assert!(!policy.retryable);
910        assert_eq!(policy.max_retries, 0);
911    }
912
913    #[test]
914    fn test_retry_policy_serde() {
915        let policy = RetryPolicy::retryable(3);
916        let json = serde_json::to_string(&policy).unwrap();
917        let parsed: RetryPolicy = serde_json::from_str(&json).unwrap();
918        assert_eq!(policy.retryable, parsed.retryable);
919        assert_eq!(policy.max_retries, parsed.max_retries);
920    }
921
922    // =========================================================================
923    // LlmErrorCode Tests
924    // =========================================================================
925
926    #[test]
927    fn test_llm_error_code_retryable() {
928        assert!(LlmErrorCode::RateLimit.is_retryable());
929        assert!(LlmErrorCode::ModelUnavailable.is_retryable());
930        assert!(LlmErrorCode::ProviderError.is_retryable());
931
932        assert!(!LlmErrorCode::ContextOverflow.is_retryable());
933        assert!(!LlmErrorCode::ContentFiltered.is_retryable());
934        assert!(!LlmErrorCode::InvalidRequest.is_retryable());
935        assert!(!LlmErrorCode::AuthFailed.is_retryable());
936    }
937
938    #[test]
939    fn test_llm_error_code_display() {
940        assert_eq!(format!("{}", LlmErrorCode::RateLimit), "rate_limit");
941        assert_eq!(
942            format!("{}", LlmErrorCode::ContextOverflow),
943            "context_overflow"
944        );
945        assert_eq!(
946            format!("{}", LlmErrorCode::ContentFiltered),
947            "content_filtered"
948        );
949        assert_eq!(
950            format!("{}", LlmErrorCode::InvalidRequest),
951            "invalid_request"
952        );
953        assert_eq!(format!("{}", LlmErrorCode::AuthFailed), "auth_failed");
954        assert_eq!(
955            format!("{}", LlmErrorCode::ModelUnavailable),
956            "model_unavailable"
957        );
958        assert_eq!(format!("{}", LlmErrorCode::ProviderError), "provider_error");
959    }
960
961    #[test]
962    fn test_llm_error_code_serde() {
963        let codes = vec![
964            LlmErrorCode::RateLimit,
965            LlmErrorCode::ContextOverflow,
966            LlmErrorCode::ContentFiltered,
967            LlmErrorCode::InvalidRequest,
968            LlmErrorCode::AuthFailed,
969            LlmErrorCode::ModelUnavailable,
970            LlmErrorCode::ProviderError,
971        ];
972
973        for code in codes {
974            let json = serde_json::to_string(&code).unwrap();
975            let parsed: LlmErrorCode = serde_json::from_str(&json).unwrap();
976            assert_eq!(code, parsed);
977        }
978    }
979
980    // =========================================================================
981    // ToolErrorCode Tests
982    // =========================================================================
983
984    #[test]
985    fn test_tool_error_code_retryable() {
986        assert!(ToolErrorCode::Timeout.is_retryable());
987        assert!(ToolErrorCode::ExecutionFailed.is_retryable());
988
989        assert!(!ToolErrorCode::NotFound.is_retryable());
990        assert!(!ToolErrorCode::PermissionDenied.is_retryable());
991        assert!(!ToolErrorCode::InvalidInput.is_retryable());
992        assert!(!ToolErrorCode::OutputInvalid.is_retryable());
993    }
994
995    #[test]
996    fn test_tool_error_code_display() {
997        assert_eq!(format!("{}", ToolErrorCode::NotFound), "not_found");
998        assert_eq!(
999            format!("{}", ToolErrorCode::PermissionDenied),
1000            "permission_denied"
1001        );
1002        assert_eq!(format!("{}", ToolErrorCode::InvalidInput), "invalid_input");
1003        assert_eq!(
1004            format!("{}", ToolErrorCode::ExecutionFailed),
1005            "execution_failed"
1006        );
1007        assert_eq!(format!("{}", ToolErrorCode::Timeout), "timeout");
1008        assert_eq!(
1009            format!("{}", ToolErrorCode::OutputInvalid),
1010            "output_invalid"
1011        );
1012    }
1013
1014    #[test]
1015    fn test_tool_error_code_serde() {
1016        let codes = vec![
1017            ToolErrorCode::NotFound,
1018            ToolErrorCode::PermissionDenied,
1019            ToolErrorCode::InvalidInput,
1020            ToolErrorCode::ExecutionFailed,
1021            ToolErrorCode::Timeout,
1022            ToolErrorCode::OutputInvalid,
1023        ];
1024
1025        for code in codes {
1026            let json = serde_json::to_string(&code).unwrap();
1027            let parsed: ToolErrorCode = serde_json::from_str(&json).unwrap();
1028            assert_eq!(code, parsed);
1029        }
1030    }
1031
1032    // =========================================================================
1033    // ExecutionError Tests
1034    // =========================================================================
1035
1036    #[test]
1037    fn test_execution_error_new() {
1038        let error = ExecutionError::new(ExecutionErrorCategory::LlmError, "Test message");
1039        assert_eq!(error.category, ExecutionErrorCategory::LlmError);
1040        assert_eq!(error.message, "Test message");
1041        assert_eq!(error.attempt, 1);
1042        assert!(error.retry_policy.retryable);
1043    }
1044
1045    #[test]
1046    fn test_execution_error_with_code() {
1047        let error =
1048            ExecutionError::new(ExecutionErrorCategory::ToolError, "Test").with_code("custom_code");
1049        assert_eq!(error.code, Some("custom_code".to_string()));
1050    }
1051
1052    #[test]
1053    fn test_execution_error_with_attempt() {
1054        let error = ExecutionError::new(ExecutionErrorCategory::LlmError, "Test").with_attempt(3);
1055        assert_eq!(error.attempt, 3);
1056    }
1057
1058    #[test]
1059    fn test_execution_error_with_step_id() {
1060        let step_id = StepId::from_string("step_test");
1061        let error = ExecutionError::new(ExecutionErrorCategory::ToolError, "Test")
1062            .with_step_id(step_id.clone());
1063        assert_eq!(error.step_id.unwrap().as_str(), "step_test");
1064    }
1065
1066    #[test]
1067    fn test_execution_error_with_provider() {
1068        let error = ExecutionError::llm(LlmErrorCode::RateLimit, "Test").with_provider("openai");
1069        assert_eq!(error.provider, Some("openai".to_string()));
1070    }
1071
1072    #[test]
1073    fn test_execution_error_with_details() {
1074        let details = serde_json::json!({"key": "value"});
1075        let error = ExecutionError::new(ExecutionErrorCategory::ToolError, "Test")
1076            .with_details(details.clone());
1077        assert_eq!(error.details, Some(details));
1078    }
1079
1080    #[test]
1081    fn test_execution_error_with_retry_policy() {
1082        let policy = RetryPolicy::retryable(10);
1083        let error = ExecutionError::new(ExecutionErrorCategory::ToolError, "Test")
1084            .with_retry_policy(policy.clone());
1085        assert_eq!(error.retry_policy.max_retries, 10);
1086    }
1087
1088    #[test]
1089    fn test_execution_error_convenience_constructors() {
1090        let llm = ExecutionError::llm(LlmErrorCode::RateLimit, "LLM error");
1091        assert_eq!(llm.category, ExecutionErrorCategory::LlmError);
1092
1093        let tool = ExecutionError::tool(ToolErrorCode::NotFound, "Tool error");
1094        assert_eq!(tool.category, ExecutionErrorCategory::ToolError);
1095
1096        let policy = ExecutionError::policy_violation("Policy error");
1097        assert_eq!(policy.category, ExecutionErrorCategory::PolicyViolation);
1098
1099        let timeout = ExecutionError::timeout("Timeout error");
1100        assert_eq!(timeout.category, ExecutionErrorCategory::Timeout);
1101
1102        let quota = ExecutionError::quota_exceeded("Quota error");
1103        assert_eq!(quota.category, ExecutionErrorCategory::QuotaExceeded);
1104
1105        let kernel = ExecutionError::kernel_internal("Kernel error");
1106        assert_eq!(kernel.category, ExecutionErrorCategory::KernelInternal);
1107
1108        let validation = ExecutionError::validation("Validation error");
1109        assert_eq!(validation.category, ExecutionErrorCategory::ValidationError);
1110
1111        let network = ExecutionError::network("Network error");
1112        assert_eq!(network.category, ExecutionErrorCategory::NetworkError);
1113    }
1114
1115    #[test]
1116    fn test_execution_error_is_retryable() {
1117        assert!(ExecutionError::llm(LlmErrorCode::RateLimit, "").is_retryable());
1118        assert!(ExecutionError::tool(ToolErrorCode::Timeout, "").is_retryable());
1119        assert!(ExecutionError::timeout("").is_retryable());
1120        assert!(ExecutionError::network("").is_retryable());
1121
1122        assert!(!ExecutionError::policy_violation("").is_retryable());
1123        assert!(!ExecutionError::quota_exceeded("").is_retryable());
1124        assert!(!ExecutionError::kernel_internal("").is_retryable());
1125        assert!(!ExecutionError::validation("").is_retryable());
1126    }
1127
1128    #[test]
1129    fn test_execution_error_is_fatal() {
1130        assert!(!ExecutionError::llm(LlmErrorCode::RateLimit, "").is_fatal());
1131        assert!(!ExecutionError::timeout("").is_fatal());
1132
1133        assert!(ExecutionError::policy_violation("").is_fatal());
1134        assert!(ExecutionError::quota_exceeded("").is_fatal());
1135        assert!(ExecutionError::kernel_internal("").is_fatal());
1136        assert!(ExecutionError::validation("").is_fatal());
1137    }
1138
1139    #[test]
1140    fn test_execution_error_retry_delay() {
1141        let error = ExecutionError::new(ExecutionErrorCategory::LlmError, "Test");
1142        let delay = error.retry_delay();
1143        assert!(delay > Duration::ZERO);
1144    }
1145
1146    #[test]
1147    fn test_execution_error_next_attempt() {
1148        let error = ExecutionError::llm(LlmErrorCode::RateLimit, "Test");
1149        assert_eq!(error.attempt, 1);
1150
1151        let error2 = error.next_attempt();
1152        assert_eq!(error2.attempt, 2);
1153
1154        let error3 = error2.next_attempt();
1155        assert_eq!(error3.attempt, 3);
1156    }
1157
1158    #[test]
1159    fn test_execution_error_to_http_status_all_categories() {
1160        assert_eq!(
1161            ExecutionError::llm(LlmErrorCode::RateLimit, "").to_http_status(),
1162            502
1163        );
1164        assert_eq!(
1165            ExecutionError::tool(ToolErrorCode::NotFound, "").to_http_status(),
1166            500
1167        );
1168        assert_eq!(ExecutionError::policy_violation("").to_http_status(), 403);
1169        assert_eq!(ExecutionError::timeout("").to_http_status(), 504);
1170        assert_eq!(ExecutionError::quota_exceeded("").to_http_status(), 429);
1171        assert_eq!(ExecutionError::kernel_internal("").to_http_status(), 500);
1172        assert_eq!(ExecutionError::validation("").to_http_status(), 400);
1173        assert_eq!(ExecutionError::network("").to_http_status(), 503);
1174    }
1175
1176    #[test]
1177    fn test_execution_error_to_http_status_override() {
1178        let error = ExecutionError::network("Test").with_http_status(418);
1179        assert_eq!(error.to_http_status(), 418);
1180    }
1181
1182    #[test]
1183    fn test_execution_error_display() {
1184        let error = ExecutionError::llm(LlmErrorCode::RateLimit, "Too many requests");
1185        let display = format!("{}", error);
1186        assert!(display.contains("LlmError"));
1187        assert!(display.contains("Too many requests"));
1188        assert!(display.contains("rate_limit"));
1189    }
1190
1191    #[test]
1192    fn test_execution_error_display_with_attempt() {
1193        let error = ExecutionError::llm(LlmErrorCode::RateLimit, "Test").with_attempt(3);
1194        let display = format!("{}", error);
1195        assert!(display.contains("[attempt 3]"));
1196    }
1197
1198    #[test]
1199    fn test_execution_error_display_no_attempt_shown_for_first() {
1200        let error = ExecutionError::llm(LlmErrorCode::RateLimit, "Test");
1201        let display = format!("{}", error);
1202        assert!(!display.contains("attempt"));
1203    }
1204
1205    // =========================================================================
1206    // From Implementation Tests
1207    // =========================================================================
1208
1209    #[test]
1210    fn test_from_serde_json_error() {
1211        let json_err = serde_json::from_str::<String>("invalid json").unwrap_err();
1212        let error: ExecutionError = json_err.into();
1213        assert_eq!(error.category, ExecutionErrorCategory::ValidationError);
1214        assert!(error.message.contains("JSON error"));
1215    }
1216
1217    #[test]
1218    fn test_from_io_error_timeout() {
1219        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timed out");
1220        let error: ExecutionError = io_err.into();
1221        assert_eq!(error.category, ExecutionErrorCategory::Timeout);
1222    }
1223
1224    #[test]
1225    fn test_from_io_error_connection_refused() {
1226        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionRefused, "refused");
1227        let error: ExecutionError = io_err.into();
1228        assert_eq!(error.category, ExecutionErrorCategory::NetworkError);
1229    }
1230
1231    #[test]
1232    fn test_from_io_error_connection_reset() {
1233        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionReset, "reset");
1234        let error: ExecutionError = io_err.into();
1235        assert_eq!(error.category, ExecutionErrorCategory::NetworkError);
1236    }
1237
1238    #[test]
1239    fn test_from_io_error_connection_aborted() {
1240        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionAborted, "aborted");
1241        let error: ExecutionError = io_err.into();
1242        assert_eq!(error.category, ExecutionErrorCategory::NetworkError);
1243    }
1244
1245    #[test]
1246    fn test_from_io_error_other() {
1247        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "not found");
1248        let error: ExecutionError = io_err.into();
1249        assert_eq!(error.category, ExecutionErrorCategory::KernelInternal);
1250    }
1251
1252    // =========================================================================
1253    // Default Retry Policy Tests
1254    // =========================================================================
1255
1256    #[test]
1257    fn test_default_retry_policy_tool_error() {
1258        let policy = ExecutionErrorCategory::ToolError.default_retry_policy();
1259        assert!(policy.retryable);
1260        assert_eq!(policy.max_retries, 2);
1261        assert_eq!(policy.backoff_strategy, BackoffStrategy::Constant);
1262        assert!(policy.requires_idempotency_key);
1263    }
1264
1265    #[test]
1266    fn test_default_retry_policy_timeout() {
1267        let policy = ExecutionErrorCategory::Timeout.default_retry_policy();
1268        assert!(policy.retryable);
1269        assert_eq!(policy.max_retries, 1);
1270        assert!(policy.requires_idempotency_key);
1271    }
1272
1273    #[test]
1274    fn test_default_retry_policy_network() {
1275        let policy = ExecutionErrorCategory::NetworkError.default_retry_policy();
1276        assert!(policy.retryable);
1277        assert_eq!(policy.max_retries, 3);
1278        assert_eq!(policy.backoff_strategy, BackoffStrategy::Exponential);
1279        assert!(policy.requires_idempotency_key);
1280    }
1281}