Skip to main content

allframe_core/domain/
resilience.rs

1//! Domain Layer Resilience Contracts
2//!
3//! This module defines resilience contracts that the domain layer can declare
4//! without depending on infrastructure implementations. These contracts allow
5//! domain logic to specify resilience requirements while maintaining Clean
6//! Architecture principles.
7//!
8//! The domain layer defines WHAT resilience is needed, not HOW it's
9//! implemented.
10
11use std::time::Duration;
12
13/// Resilience policies that domain entities can declare.
14/// These represent business requirements for reliability, not implementation
15/// details.
16#[derive(Clone, Debug, PartialEq)]
17pub enum ResiliencePolicy {
18    /// No resilience - execute once
19    None,
20
21    /// Retry on failure with backoff strategy
22    Retry {
23        max_attempts: u32,
24        backoff: BackoffStrategy,
25    },
26
27    /// Circuit breaker pattern for fault tolerance
28    CircuitBreaker {
29        failure_threshold: u32,
30        recovery_timeout: Duration,
31        success_threshold: u32,
32    },
33
34    /// Rate limiting to prevent resource exhaustion
35    RateLimit {
36        requests_per_second: u32,
37        burst_capacity: u32,
38    },
39
40    /// Timeout protection
41    Timeout { duration: Duration },
42
43    /// Combination of multiple policies
44    Combined { policies: Vec<ResiliencePolicy> },
45}
46
47/// Backoff strategies for retry operations
48#[derive(Clone, Debug, PartialEq)]
49pub enum BackoffStrategy {
50    /// Fixed delay between attempts
51    Fixed { delay: Duration },
52
53    /// Exponential backoff with optional jitter
54    Exponential {
55        initial_delay: Duration,
56        multiplier: f64,
57        max_delay: Option<Duration>,
58        jitter: bool,
59    },
60
61    /// Linear backoff
62    Linear {
63        initial_delay: Duration,
64        increment: Duration,
65        max_delay: Option<Duration>,
66    },
67}
68
69impl Default for BackoffStrategy {
70    fn default() -> Self {
71        Self::Exponential {
72            initial_delay: Duration::from_millis(100),
73            multiplier: 2.0,
74            max_delay: Some(Duration::from_secs(30)),
75            jitter: true,
76        }
77    }
78}
79
80/// Domain-level resilience errors.
81/// These represent business-level error conditions that can be mapped
82/// to infrastructure-level errors by the application layer.
83#[derive(thiserror::Error, Debug, Clone, PartialEq)]
84pub enum ResilienceDomainError {
85    #[error("Operation timed out after {duration:?}")]
86    Timeout { duration: Duration },
87
88    #[error("Operation failed after {attempts} attempts")]
89    RetryExhausted { attempts: u32, last_error: String },
90
91    #[error("Circuit breaker is open - service unavailable")]
92    CircuitOpen,
93
94    #[error("Rate limit exceeded - too many requests")]
95    RateLimited { retry_after: Option<Duration> },
96
97    #[error("Operation cancelled")]
98    Cancelled,
99
100    #[error("Infrastructure error: {message}")]
101    Infrastructure { message: String },
102}
103
104impl ResilienceDomainError {
105    /// Check if this error represents a temporary failure that might be retried
106    pub fn is_retryable(&self) -> bool {
107        match self {
108            Self::Timeout { .. } => true,
109            Self::RetryExhausted { .. } => false, // Already exhausted retries
110            Self::CircuitOpen => false,           // Circuit breaker protects from further calls
111            Self::RateLimited { .. } => true,     // Can retry after backoff
112            Self::Cancelled => false,             // Operation was intentionally cancelled
113            Self::Infrastructure { .. } => true,  // Infrastructure issues might be transient
114        }
115    }
116
117    /// Check if this error indicates the service is unavailable
118    pub fn is_service_unavailable(&self) -> bool {
119        matches!(self, Self::CircuitOpen)
120    }
121
122    /// Get suggested retry delay if applicable
123    pub fn retry_after(&self) -> Option<Duration> {
124        match self {
125            Self::RateLimited { retry_after } => *retry_after,
126            _ => None,
127        }
128    }
129}
130
131/// Trait for domain operations that declare resilience requirements.
132/// Domain entities implement this to specify how they should be executed
133/// reliably.
134#[async_trait::async_trait]
135pub trait ResilientOperation<T, E> {
136    /// Declare the resilience policy required for this operation
137    fn resilience_policy(&self) -> ResiliencePolicy;
138
139    /// Execute the core business logic
140    async fn execute(&self) -> Result<T, E>;
141
142    /// Get a unique identifier for this operation (for circuit breakers,
143    /// metrics, etc.)
144    fn operation_id(&self) -> &str {
145        std::any::type_name::<Self>()
146    }
147
148    /// Check if this operation is critical (affects circuit breaker behavior)
149    fn is_critical(&self) -> bool {
150        true
151    }
152}
153
154/// Trait for domain services that need resilience.
155/// Services implement this to declare their resilience requirements at the
156/// service level.
157#[async_trait::async_trait]
158pub trait ResilientService {
159    /// Get the default resilience policy for this service
160    fn default_resilience_policy(&self) -> ResiliencePolicy {
161        ResiliencePolicy::None
162    }
163
164    /// Get service-specific resilience policies for different operations
165    fn operation_policies(&self) -> std::collections::HashMap<String, ResiliencePolicy> {
166        std::collections::HashMap::new()
167    }
168
169    /// Get the service identifier for monitoring and circuit breakers
170    fn service_id(&self) -> &str {
171        std::any::type_name::<Self>()
172    }
173}
174
175/// Configuration for resilience behavior.
176/// Domain layer can provide hints about expected behavior without knowing
177/// implementation.
178#[derive(Clone, Debug)]
179pub struct ResilienceConfig {
180    /// Whether to enable resilience globally
181    pub enabled: bool,
182
183    /// Default policies for different operation types
184    pub default_policies: std::collections::HashMap<String, ResiliencePolicy>,
185
186    /// Service-specific overrides
187    pub service_overrides: std::collections::HashMap<String, ResiliencePolicy>,
188}
189
190impl Default for ResilienceConfig {
191    fn default() -> Self {
192        Self {
193            enabled: true,
194            default_policies: std::collections::HashMap::new(),
195            service_overrides: std::collections::HashMap::new(),
196        }
197    }
198}
199
200/// Helper for creating common resilience policies
201pub mod policies {
202    use std::time::Duration;
203
204    use super::*;
205
206    /// Create a simple retry policy
207    pub fn retry(max_attempts: u32) -> ResiliencePolicy {
208        ResiliencePolicy::Retry {
209            max_attempts,
210            backoff: BackoffStrategy::default(),
211        }
212    }
213
214    /// Create a circuit breaker policy
215    pub fn circuit_breaker(failure_threshold: u32, recovery_timeout_secs: u64) -> ResiliencePolicy {
216        ResiliencePolicy::CircuitBreaker {
217            failure_threshold,
218            recovery_timeout: Duration::from_secs(recovery_timeout_secs),
219            success_threshold: 3,
220        }
221    }
222
223    /// Create a rate limiting policy
224    pub fn rate_limit(requests_per_second: u32) -> ResiliencePolicy {
225        ResiliencePolicy::RateLimit {
226            requests_per_second,
227            burst_capacity: requests_per_second / 4, // Allow burst of 25%
228        }
229    }
230
231    /// Create a timeout policy
232    pub fn timeout(seconds: u64) -> ResiliencePolicy {
233        ResiliencePolicy::Timeout {
234            duration: Duration::from_secs(seconds),
235        }
236    }
237
238    /// Combine multiple policies
239    pub fn combine(policies: Vec<ResiliencePolicy>) -> ResiliencePolicy {
240        ResiliencePolicy::Combined { policies }
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247
248    #[test]
249    fn test_resilience_policy_creation() {
250        let retry_policy = policies::retry(3);
251        assert_eq!(
252            retry_policy,
253            ResiliencePolicy::Retry {
254                max_attempts: 3,
255                backoff: BackoffStrategy::default(),
256            }
257        );
258
259        let circuit_policy = policies::circuit_breaker(5, 30);
260        match circuit_policy {
261            ResiliencePolicy::CircuitBreaker {
262                failure_threshold,
263                recovery_timeout,
264                ..
265            } => {
266                assert_eq!(failure_threshold, 5);
267                assert_eq!(recovery_timeout, Duration::from_secs(30));
268            }
269            _ => panic!("Expected CircuitBreaker policy"),
270        }
271    }
272
273    #[test]
274    fn test_domain_error_retryable() {
275        assert!(ResilienceDomainError::Timeout {
276            duration: Duration::from_secs(1)
277        }
278        .is_retryable());
279        assert!(ResilienceDomainError::RateLimited { retry_after: None }.is_retryable());
280        assert!(!ResilienceDomainError::CircuitOpen.is_retryable());
281        assert!(!ResilienceDomainError::Cancelled.is_retryable());
282    }
283
284    #[test]
285    fn test_backoff_strategy_default() {
286        let strategy = BackoffStrategy::default();
287        match strategy {
288            BackoffStrategy::Exponential {
289                initial_delay,
290                multiplier,
291                max_delay,
292                jitter,
293            } => {
294                assert_eq!(initial_delay, Duration::from_millis(100));
295                assert_eq!(multiplier, 2.0);
296                assert_eq!(max_delay, Some(Duration::from_secs(30)));
297                assert!(jitter);
298            }
299            _ => panic!("Expected Exponential backoff"),
300        }
301    }
302
303    #[test]
304    fn test_combined_policies() {
305        let retry = policies::retry(3);
306        let timeout = policies::timeout(10);
307        let combined = policies::combine(vec![retry.clone(), timeout]);
308
309        match combined {
310            ResiliencePolicy::Combined { policies } => {
311                assert_eq!(policies.len(), 2);
312                assert_eq!(policies[0], retry);
313            }
314            _ => panic!("Expected Combined policy"),
315        }
316    }
317}