1use std::sync::{
6 Arc,
7 atomic::{AtomicU64, Ordering},
8};
9
10use chrono::{DateTime, Duration, Utc};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum RecoveryStrategy {
15 UseCache,
17 Retry,
19 FailFast,
21 ReadOnly,
23}
24
25impl std::fmt::Display for RecoveryStrategy {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 match self {
28 Self::UseCache => write!(f, "use_cache"),
29 Self::Retry => write!(f, "retry"),
30 Self::FailFast => write!(f, "fail_fast"),
31 Self::ReadOnly => write!(f, "read_only"),
32 }
33 }
34}
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum ErrorCategory {
39 NetworkError,
41 VaultUnavailable,
43 KeyNotFound,
45 KeyExpired,
47 PermissionDenied,
49 EncryptionFailed,
51 DecryptionFailed,
53 CacheMiss,
55 Unknown,
57}
58
59impl std::fmt::Display for ErrorCategory {
60 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61 match self {
62 Self::NetworkError => write!(f, "network_error"),
63 Self::VaultUnavailable => write!(f, "vault_unavailable"),
64 Self::KeyNotFound => write!(f, "key_not_found"),
65 Self::KeyExpired => write!(f, "key_expired"),
66 Self::PermissionDenied => write!(f, "permission_denied"),
67 Self::EncryptionFailed => write!(f, "encryption_failed"),
68 Self::DecryptionFailed => write!(f, "decryption_failed"),
69 Self::CacheMiss => write!(f, "cache_miss"),
70 Self::Unknown => write!(f, "unknown"),
71 }
72 }
73}
74
75#[derive(Debug, Clone)]
77pub struct RecoveryError {
78 pub category: ErrorCategory,
80 pub message: String,
82 pub strategy: RecoveryStrategy,
84 pub suggestion: String,
86 pub timestamp: DateTime<Utc>,
88 pub retry_count: u32,
90 pub retryable: bool,
92}
93
94impl RecoveryError {
95 pub fn new(category: ErrorCategory, message: impl Into<String>) -> Self {
97 let suggestion = match category {
98 ErrorCategory::NetworkError => "Check network connectivity and retry".to_string(),
99 ErrorCategory::VaultUnavailable => "Vault is unavailable. Check Vault status and retry after 30s".to_string(),
100 ErrorCategory::KeyNotFound => "Encryption key not found. Check key reference in configuration".to_string(),
101 ErrorCategory::KeyExpired => "Encryption key has expired. Key will be refreshed automatically. Retry the operation".to_string(),
102 ErrorCategory::PermissionDenied => "Permission denied accessing encryption key. Check authentication credentials".to_string(),
103 ErrorCategory::EncryptionFailed => "Encryption operation failed. Check input data and retry".to_string(),
104 ErrorCategory::DecryptionFailed => "Decryption operation failed. Data may be corrupted".to_string(),
105 ErrorCategory::CacheMiss => "Key not in cache. Vault fetch will be attempted".to_string(),
106 ErrorCategory::Unknown => "An unknown error occurred. Check logs for details".to_string(),
107 };
108
109 let (strategy, retryable) = match category {
110 ErrorCategory::NetworkError => (RecoveryStrategy::Retry, true),
111 ErrorCategory::VaultUnavailable => (RecoveryStrategy::UseCache, true),
112 ErrorCategory::KeyExpired => (RecoveryStrategy::Retry, true),
113 ErrorCategory::KeyNotFound => (RecoveryStrategy::FailFast, false),
114 ErrorCategory::PermissionDenied => (RecoveryStrategy::FailFast, false),
115 ErrorCategory::EncryptionFailed => (RecoveryStrategy::FailFast, false),
116 ErrorCategory::DecryptionFailed => (RecoveryStrategy::FailFast, false),
117 ErrorCategory::CacheMiss => (RecoveryStrategy::Retry, true),
118 ErrorCategory::Unknown => (RecoveryStrategy::FailFast, false),
119 };
120
121 Self {
122 category,
123 message: message.into(),
124 strategy,
125 suggestion,
126 timestamp: Utc::now(),
127 retry_count: 0,
128 retryable,
129 }
130 }
131
132 pub fn with_retry_count(mut self, count: u32) -> Self {
134 self.retry_count = count;
135 self
136 }
137
138 pub fn age(&self) -> Duration {
140 Utc::now() - self.timestamp
141 }
142
143 pub fn is_fresh(&self) -> bool {
145 self.age() < Duration::minutes(1)
146 }
147
148 pub fn is_transient(&self) -> bool {
150 self.retryable
151 }
152
153 pub fn should_use_cache(&self) -> bool {
155 matches!(self.strategy, RecoveryStrategy::UseCache | RecoveryStrategy::ReadOnly)
156 }
157
158 pub fn retry_delay_ms(&self, config: &RetryConfig) -> u64 {
160 config.backoff_delay_ms(self.retry_count)
161 }
162}
163
164#[derive(Debug, Clone)]
166pub struct RetryConfig {
167 pub max_retries: u32,
169 pub initial_backoff_ms: u64,
171 pub max_backoff_ms: u64,
173 pub backoff_multiplier: f64,
175}
176
177impl RetryConfig {
178 pub fn new() -> Self {
180 Self {
181 max_retries: 3,
182 initial_backoff_ms: 100,
183 max_backoff_ms: 5000,
184 backoff_multiplier: 2.0,
185 }
186 }
187
188 pub fn with_max_retries(mut self, max: u32) -> Self {
190 self.max_retries = max;
191 self
192 }
193
194 pub fn backoff_delay_ms(&self, attempt: u32) -> u64 {
196 let delay = self.initial_backoff_ms as f64 * self.backoff_multiplier.powi(attempt as i32);
197 (delay as u64).min(self.max_backoff_ms)
198 }
199
200 pub fn should_retry(&self, attempt: u32) -> bool {
202 attempt < self.max_retries
203 }
204
205 pub fn backoff_delay_with_jitter_ms(&self, attempt: u32) -> u64 {
207 use rand::Rng;
208 let base_delay = self.backoff_delay_ms(attempt);
209 let jitter_percent = base_delay / 10;
211 let mut rng = rand::thread_rng();
212 let jitter = rng.gen_range(0..=jitter_percent);
213 let use_add = rng.gen_bool(0.5);
214 if use_add {
215 base_delay + jitter
216 } else {
217 base_delay.saturating_sub(jitter)
218 }
219 }
220}
221
222impl Default for RetryConfig {
223 fn default() -> Self {
224 Self::new()
225 }
226}
227
228#[derive(Debug)]
230pub struct CircuitBreaker {
231 failure_threshold: u32,
233 success_threshold: u32,
235 failure_count: Arc<AtomicU64>,
237 success_count: Arc<AtomicU64>,
239 state: Arc<atomic::AtomicUsize>,
241 last_change: Arc<std::sync::Mutex<DateTime<Utc>>>,
243}
244
245mod atomic {
246 use std::sync::atomic::AtomicUsize as StdAtomicUsize;
247
248 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
249 #[repr(usize)]
250 pub enum CircuitState {
251 Closed = 0,
252 Open = 1,
253 HalfOpen = 2,
254 }
255
256 impl CircuitState {
257 pub fn from_usize(val: usize) -> Self {
258 match val {
259 0 => CircuitState::Closed,
260 1 => CircuitState::Open,
261 2 => CircuitState::HalfOpen,
262 _ => CircuitState::Closed,
263 }
264 }
265
266 pub fn to_usize(self) -> usize {
267 self as usize
268 }
269 }
270
271 pub struct AtomicUsize(StdAtomicUsize);
272
273 impl AtomicUsize {
274 pub fn new(val: CircuitState) -> Self {
275 AtomicUsize(StdAtomicUsize::new(val.to_usize()))
276 }
277
278 pub fn load(&self) -> CircuitState {
279 CircuitState::from_usize(self.0.load(std::sync::atomic::Ordering::Relaxed))
280 }
281
282 pub fn store(&self, val: CircuitState) {
283 self.0.store(val.to_usize(), std::sync::atomic::Ordering::Relaxed)
284 }
285 }
286
287 impl std::fmt::Debug for AtomicUsize {
288 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
289 write!(f, "{:?}", self.load())
290 }
291 }
292}
293
294use atomic::{AtomicUsize, CircuitState};
295
296impl CircuitBreaker {
297 pub fn new(failure_threshold: u32, success_threshold: u32) -> Self {
299 Self {
300 failure_threshold,
301 success_threshold,
302 failure_count: Arc::new(AtomicU64::new(0)),
303 success_count: Arc::new(AtomicU64::new(0)),
304 state: Arc::new(AtomicUsize::new(CircuitState::Closed)),
305 last_change: Arc::new(std::sync::Mutex::new(Utc::now())),
306 }
307 }
308
309 pub fn record_success(&self) {
311 let state = self.state.load();
312 match state {
313 CircuitState::Closed => {
314 self.failure_count.store(0, Ordering::Relaxed);
315 },
316 CircuitState::HalfOpen => {
317 let success = self.success_count.fetch_add(1, Ordering::Relaxed) + 1;
318 if success >= self.success_threshold as u64 {
319 self.state.store(CircuitState::Closed);
320 self.failure_count.store(0, Ordering::Relaxed);
321 self.success_count.store(0, Ordering::Relaxed);
322 if let Ok(mut last) = self.last_change.lock() {
323 *last = Utc::now();
324 }
325 }
326 },
327 CircuitState::Open => {},
328 }
329 }
330
331 pub fn record_failure(&self) {
333 let state = self.state.load();
334 match state {
335 CircuitState::Closed => {
336 let failures = self.failure_count.fetch_add(1, Ordering::Relaxed) + 1;
337 if failures >= self.failure_threshold as u64 {
338 self.state.store(CircuitState::Open);
339 if let Ok(mut last) = self.last_change.lock() {
340 *last = Utc::now();
341 }
342 }
343 },
344 CircuitState::Open => {
345 },
347 CircuitState::HalfOpen => {
348 self.state.store(CircuitState::Open);
349 self.success_count.store(0, Ordering::Relaxed);
350 if let Ok(mut last) = self.last_change.lock() {
351 *last = Utc::now();
352 }
353 },
354 }
355 }
356
357 pub fn is_allowed(&self) -> bool {
359 matches!(self.state.load(), CircuitState::Closed | CircuitState::HalfOpen)
360 }
361
362 pub fn state(&self) -> CircuitState {
364 self.state.load()
365 }
366
367 pub fn reset(&self) {
369 self.state.store(CircuitState::Closed);
370 self.failure_count.store(0, Ordering::Relaxed);
371 self.success_count.store(0, Ordering::Relaxed);
372 }
373
374 pub fn time_since_last_change(&self) -> Duration {
376 if let Ok(last) = self.last_change.lock() {
377 Utc::now() - *last
378 } else {
379 Duration::zero()
380 }
381 }
382
383 pub fn should_attempt_recovery(&self, recovery_timeout_ms: u64) -> bool {
385 matches!(self.state.load(), CircuitState::Open)
386 && self.time_since_last_change().num_milliseconds() as u64 >= recovery_timeout_ms
387 }
388
389 pub fn attempt_recovery(&self, recovery_timeout_ms: u64) {
391 if self.should_attempt_recovery(recovery_timeout_ms) {
392 self.state.store(CircuitState::HalfOpen);
393 self.success_count.store(0, Ordering::Relaxed);
394 if let Ok(mut last) = self.last_change.lock() {
395 *last = Utc::now();
396 }
397 }
398 }
399
400 pub fn get_counts(&self) -> (u64, u64) {
402 let failures = self.failure_count.load(Ordering::Relaxed);
403 let successes = self.success_count.load(Ordering::Relaxed);
404 (failures, successes)
405 }
406
407 pub fn is_open(&self) -> bool {
409 matches!(self.state.load(), CircuitState::Open)
410 }
411
412 pub fn is_half_open(&self) -> bool {
414 matches!(self.state.load(), CircuitState::HalfOpen)
415 }
416}
417
418#[cfg(test)]
419mod tests {
420 use super::*;
421
422 #[test]
423 fn test_recovery_strategy_display() {
424 assert_eq!(RecoveryStrategy::UseCache.to_string(), "use_cache");
425 assert_eq!(RecoveryStrategy::Retry.to_string(), "retry");
426 assert_eq!(RecoveryStrategy::FailFast.to_string(), "fail_fast");
427 assert_eq!(RecoveryStrategy::ReadOnly.to_string(), "read_only");
428 }
429
430 #[test]
431 fn test_error_category_display() {
432 assert_eq!(ErrorCategory::NetworkError.to_string(), "network_error");
433 assert_eq!(ErrorCategory::VaultUnavailable.to_string(), "vault_unavailable");
434 assert_eq!(ErrorCategory::KeyNotFound.to_string(), "key_not_found");
435 }
436
437 #[test]
438 fn test_recovery_error_network() {
439 let error = RecoveryError::new(ErrorCategory::NetworkError, "Connection timeout");
440 assert_eq!(error.category, ErrorCategory::NetworkError);
441 assert_eq!(error.strategy, RecoveryStrategy::Retry);
442 assert!(error.retryable);
443 }
444
445 #[test]
446 fn test_recovery_error_vault_unavailable() {
447 let error = RecoveryError::new(ErrorCategory::VaultUnavailable, "Vault unreachable");
448 assert_eq!(error.category, ErrorCategory::VaultUnavailable);
449 assert_eq!(error.strategy, RecoveryStrategy::UseCache);
450 assert!(error.retryable);
451 }
452
453 #[test]
454 fn test_recovery_error_key_not_found() {
455 let error = RecoveryError::new(ErrorCategory::KeyNotFound, "Key missing");
456 assert!(!error.retryable);
457 assert_eq!(error.strategy, RecoveryStrategy::FailFast);
458 }
459
460 #[test]
461 fn test_recovery_error_with_retry_count() {
462 let error = RecoveryError::new(ErrorCategory::NetworkError, "Timeout").with_retry_count(2);
463 assert_eq!(error.retry_count, 2);
464 }
465
466 #[test]
467 fn test_recovery_error_is_fresh() {
468 let error = RecoveryError::new(ErrorCategory::NetworkError, "Timeout");
469 assert!(error.is_fresh());
470 }
471
472 #[test]
473 fn test_retry_config_default() {
474 let config = RetryConfig::default();
475 assert_eq!(config.max_retries, 3);
476 assert_eq!(config.initial_backoff_ms, 100);
477 assert_eq!(config.max_backoff_ms, 5000);
478 }
479
480 #[test]
481 fn test_retry_config_backoff_calculation() {
482 let config = RetryConfig::new();
483 assert_eq!(config.backoff_delay_ms(0), 100);
484 assert_eq!(config.backoff_delay_ms(1), 200);
485 assert_eq!(config.backoff_delay_ms(2), 400);
486 assert_eq!(config.backoff_delay_ms(3), 800);
487 }
488
489 #[test]
490 fn test_retry_config_max_backoff() {
491 let config = RetryConfig::new().with_max_retries(10);
492 let delay = config.backoff_delay_ms(10);
493 assert_eq!(delay, config.max_backoff_ms);
494 }
495
496 #[test]
497 fn test_circuit_breaker_closed() {
498 let breaker = CircuitBreaker::new(3, 2);
499 assert!(breaker.is_allowed());
500 assert_eq!(breaker.state(), CircuitState::Closed);
501 }
502
503 #[test]
504 fn test_circuit_breaker_opens_on_failure() {
505 let breaker = CircuitBreaker::new(3, 2);
506 breaker.record_failure();
507 breaker.record_failure();
508 breaker.record_failure();
509 assert!(!breaker.is_allowed());
510 assert_eq!(breaker.state(), CircuitState::Open);
511 }
512
513 #[test]
514 fn test_circuit_breaker_half_open_on_success() {
515 let breaker = CircuitBreaker::new(3, 2);
516 breaker.record_failure();
518 breaker.record_failure();
519 breaker.record_failure();
520
521 breaker.state.store(CircuitState::HalfOpen);
523 breaker.record_success();
524 breaker.record_success();
525
526 assert!(breaker.is_allowed());
527 assert_eq!(breaker.state(), CircuitState::Closed);
528 }
529
530 #[test]
531 fn test_circuit_breaker_reset() {
532 let breaker = CircuitBreaker::new(3, 2);
533 breaker.record_failure();
534 breaker.record_failure();
535 breaker.record_failure();
536 assert!(!breaker.is_allowed());
537
538 breaker.reset();
539 assert!(breaker.is_allowed());
540 assert_eq!(breaker.state(), CircuitState::Closed);
541 }
542
543 #[test]
544 fn test_recovery_error_is_transient() {
545 let transient = RecoveryError::new(ErrorCategory::NetworkError, "Network issue");
546 assert!(transient.is_transient());
547
548 let not_transient = RecoveryError::new(ErrorCategory::KeyNotFound, "Key missing");
549 assert!(!not_transient.is_transient());
550 }
551
552 #[test]
553 fn test_recovery_error_should_use_cache() {
554 let vault_error = RecoveryError::new(ErrorCategory::VaultUnavailable, "Vault down");
555 assert!(vault_error.should_use_cache());
556
557 let key_error = RecoveryError::new(ErrorCategory::KeyExpired, "Key expired");
558 assert!(!key_error.should_use_cache());
559 }
560
561 #[test]
562 fn test_retry_config_should_retry() {
563 let config = RetryConfig::new().with_max_retries(3);
564 assert!(config.should_retry(0));
565 assert!(config.should_retry(2));
566 assert!(!config.should_retry(3));
567 assert!(!config.should_retry(5));
568 }
569
570 #[test]
571 fn test_circuit_breaker_get_counts() {
572 let breaker = CircuitBreaker::new(5, 3);
573 let (failures, successes) = breaker.get_counts();
574 assert_eq!(failures, 0);
575 assert_eq!(successes, 0);
576
577 breaker.record_failure();
578 breaker.record_failure();
579 let (failures, _) = breaker.get_counts();
580 assert_eq!(failures, 2);
581 }
582
583 #[test]
584 fn test_circuit_breaker_is_open() {
585 let breaker = CircuitBreaker::new(2, 2);
586 assert!(!breaker.is_open());
587
588 breaker.record_failure();
589 breaker.record_failure();
590 assert!(breaker.is_open());
591 }
592
593 #[test]
594 fn test_circuit_breaker_is_half_open() {
595 let breaker = CircuitBreaker::new(2, 2);
596 assert!(!breaker.is_half_open());
597
598 breaker.record_failure();
599 breaker.record_failure();
600 breaker.state.store(CircuitState::HalfOpen);
601 assert!(breaker.is_half_open());
602 }
603
604 #[test]
605 fn test_circuit_breaker_attempt_recovery() {
606 let breaker = CircuitBreaker::new(1, 2);
607 breaker.record_failure();
608 assert!(breaker.is_open());
609
610 breaker.attempt_recovery(1000);
612 assert!(breaker.is_open());
613
614 breaker.attempt_recovery(0);
616 assert!(breaker.is_half_open());
617 }
618}