rustkernel_core/resilience/
mod.rs

1//! Resilience Patterns
2//!
3//! This module provides production-grade resilience patterns for RustKernels:
4//!
5//! - **Circuit Breaker**: Prevent cascade failures by detecting unhealthy kernels
6//! - **Timeout**: Deadline propagation and timeout enforcement
7//! - **Recovery**: Automatic recovery from transient failures
8//! - **Health**: Health checking for liveness/readiness probes
9//!
10//! # Example
11//!
12//! ```rust,ignore
13//! use rustkernel_core::resilience::{CircuitBreaker, CircuitBreakerConfig};
14//!
15//! let config = CircuitBreakerConfig::default()
16//!     .failure_threshold(5)
17//!     .reset_timeout(Duration::from_secs(30));
18//!
19//! let cb = CircuitBreaker::new("graph/pagerank", config);
20//!
21//! // Execute with circuit breaker protection
22//! cb.execute(|| async {
23//!     kernel.execute(input).await
24//! }).await?;
25//! ```
26
27pub mod circuit_breaker;
28pub mod health;
29pub mod recovery;
30pub mod timeout;
31
32pub use circuit_breaker::{CircuitBreaker, CircuitBreakerConfig, CircuitState};
33pub use health::{HealthCheck, HealthCheckResult, HealthProbe};
34pub use recovery::{RecoveryPolicy, RecoveryStrategy, RetryConfig};
35pub use timeout::{DeadlineContext, TimeoutConfig, TimeoutError};
36
37use serde::{Deserialize, Serialize};
38use std::time::Duration;
39
40/// Unified resilience configuration
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct ResilienceConfig {
43    /// Circuit breaker configuration
44    pub circuit_breaker: Option<CircuitBreakerConfig>,
45    /// Timeout configuration
46    pub timeout: Option<TimeoutConfig>,
47    /// Recovery policy
48    pub recovery: Option<RecoveryPolicy>,
49    /// Health check configuration
50    pub health_check_interval: Duration,
51}
52
53impl Default for ResilienceConfig {
54    fn default() -> Self {
55        Self {
56            circuit_breaker: Some(CircuitBreakerConfig::default()),
57            timeout: Some(TimeoutConfig::default()),
58            recovery: Some(RecoveryPolicy::default()),
59            health_check_interval: Duration::from_secs(10),
60        }
61    }
62}
63
64impl ResilienceConfig {
65    /// Create a new resilience config
66    pub fn new() -> Self {
67        Self::default()
68    }
69
70    /// Disable all resilience features
71    pub fn disabled() -> Self {
72        Self {
73            circuit_breaker: None,
74            timeout: None,
75            recovery: None,
76            health_check_interval: Duration::from_secs(60),
77        }
78    }
79
80    /// Production configuration with conservative settings
81    pub fn production() -> Self {
82        Self {
83            circuit_breaker: Some(CircuitBreakerConfig::production()),
84            timeout: Some(TimeoutConfig::production()),
85            recovery: Some(RecoveryPolicy::production()),
86            health_check_interval: Duration::from_secs(10),
87        }
88    }
89
90    /// Development configuration with relaxed settings
91    pub fn development() -> Self {
92        Self {
93            circuit_breaker: Some(CircuitBreakerConfig::default()),
94            timeout: Some(TimeoutConfig::development()),
95            recovery: Some(RecoveryPolicy::development()),
96            health_check_interval: Duration::from_secs(30),
97        }
98    }
99
100    /// Set circuit breaker config
101    pub fn with_circuit_breaker(mut self, config: CircuitBreakerConfig) -> Self {
102        self.circuit_breaker = Some(config);
103        self
104    }
105
106    /// Set timeout config
107    pub fn with_timeout(mut self, config: TimeoutConfig) -> Self {
108        self.timeout = Some(config);
109        self
110    }
111
112    /// Set recovery policy
113    pub fn with_recovery(mut self, policy: RecoveryPolicy) -> Self {
114        self.recovery = Some(policy);
115        self
116    }
117
118    /// Set health check interval
119    pub fn with_health_check_interval(mut self, interval: Duration) -> Self {
120        self.health_check_interval = interval;
121        self
122    }
123}
124
125/// Result type for resilience operations
126pub type ResilienceResult<T> = std::result::Result<T, ResilienceError>;
127
128/// Errors from resilience patterns
129#[derive(Debug, thiserror::Error)]
130pub enum ResilienceError {
131    /// Circuit breaker is open
132    #[error("Circuit breaker is open for {kernel_id}")]
133    CircuitOpen {
134        /// The kernel ID whose circuit breaker is open
135        kernel_id: String,
136    },
137
138    /// Request timed out
139    #[error("Request timed out after {timeout:?}")]
140    Timeout {
141        /// The timeout duration that was exceeded
142        timeout: Duration,
143    },
144
145    /// Deadline exceeded
146    #[error("Deadline exceeded")]
147    DeadlineExceeded,
148
149    /// Max retries exceeded
150    #[error("Max retries ({retries}) exceeded")]
151    MaxRetriesExceeded {
152        /// The number of retries that were attempted
153        retries: u32,
154    },
155
156    /// Health check failed
157    #[error("Health check failed: {reason}")]
158    HealthCheckFailed {
159        /// The reason for the health check failure
160        reason: String,
161    },
162
163    /// Kernel error during execution
164    #[error("Kernel error: {0}")]
165    KernelError(#[from] crate::error::KernelError),
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    #[test]
173    fn test_default_config() {
174        let config = ResilienceConfig::default();
175        assert!(config.circuit_breaker.is_some());
176        assert!(config.timeout.is_some());
177        assert!(config.recovery.is_some());
178    }
179
180    #[test]
181    fn test_disabled_config() {
182        let config = ResilienceConfig::disabled();
183        assert!(config.circuit_breaker.is_none());
184        assert!(config.timeout.is_none());
185        assert!(config.recovery.is_none());
186    }
187
188    #[test]
189    fn test_production_config() {
190        let config = ResilienceConfig::production();
191        assert!(config.circuit_breaker.is_some());
192        assert!(config.timeout.is_some());
193    }
194}