Skip to main content

alec/
recovery.rs

1// ALEC - Adaptive Lazy Evolving Compression
2// Copyright (c) 2025 David Martin Venti
3//
4// Dual-licensed under AGPL-3.0 and Commercial License.
5// See LICENSE file for details.
6
7//! Automatic recovery mechanisms
8//!
9//! Provides circuit breaker, retry logic, and recovery strategies.
10
11use std::time::{Duration, Instant};
12
13/// Circuit breaker states
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
15pub enum CircuitState {
16    /// Normal operation - requests are allowed
17    #[default]
18    Closed,
19    /// Failing - rejecting requests
20    Open,
21    /// Testing if recovery is possible
22    HalfOpen,
23}
24
25impl CircuitState {
26    /// Check if requests should be allowed
27    pub fn allows_requests(&self) -> bool {
28        matches!(self, Self::Closed | Self::HalfOpen)
29    }
30}
31
32/// Configuration for the circuit breaker
33#[derive(Debug, Clone)]
34pub struct CircuitConfig {
35    /// Number of failures before opening circuit
36    pub failure_threshold: u32,
37    /// Number of successes in half-open state to close circuit
38    pub success_threshold: u32,
39    /// Time before attempting recovery
40    pub recovery_timeout: Duration,
41}
42
43impl Default for CircuitConfig {
44    fn default() -> Self {
45        Self {
46            failure_threshold: 5,
47            success_threshold: 3,
48            recovery_timeout: Duration::from_secs(30),
49        }
50    }
51}
52
53/// Circuit breaker for fault tolerance
54///
55/// Implements the circuit breaker pattern to prevent cascade failures.
56/// When failures exceed the threshold, the circuit opens and rejects
57/// requests until a recovery timeout allows a half-open state.
58#[derive(Debug)]
59pub struct CircuitBreaker {
60    state: CircuitState,
61    failure_count: u32,
62    success_count: u32,
63    last_failure: Option<Instant>,
64    config: CircuitConfig,
65}
66
67impl CircuitBreaker {
68    /// Create a new circuit breaker with default configuration
69    pub fn new() -> Self {
70        Self {
71            state: CircuitState::Closed,
72            failure_count: 0,
73            success_count: 0,
74            last_failure: None,
75            config: CircuitConfig::default(),
76        }
77    }
78
79    /// Create a circuit breaker with custom configuration
80    pub fn with_config(config: CircuitConfig) -> Self {
81        Self {
82            config,
83            ..Self::new()
84        }
85    }
86
87    /// Check if request should be allowed
88    ///
89    /// Returns true if the request can proceed, false if it should be rejected.
90    /// Also handles state transitions from Open to HalfOpen when recovery timeout expires.
91    pub fn should_allow(&mut self) -> bool {
92        match self.state {
93            CircuitState::Closed => true,
94            CircuitState::Open => {
95                // Check if recovery timeout has passed
96                if let Some(last) = self.last_failure {
97                    if last.elapsed() >= self.config.recovery_timeout {
98                        self.state = CircuitState::HalfOpen;
99                        self.success_count = 0;
100                        return true;
101                    }
102                }
103                false
104            }
105            CircuitState::HalfOpen => true,
106        }
107    }
108
109    /// Record a successful operation
110    ///
111    /// In closed state: resets failure count
112    /// In half-open state: increments success count, closes circuit if threshold reached
113    pub fn record_success(&mut self) {
114        match self.state {
115            CircuitState::Closed => {
116                self.failure_count = 0;
117            }
118            CircuitState::HalfOpen => {
119                self.success_count += 1;
120                if self.success_count >= self.config.success_threshold {
121                    self.state = CircuitState::Closed;
122                    self.failure_count = 0;
123                }
124            }
125            CircuitState::Open => {}
126        }
127    }
128
129    /// Record a failed operation
130    ///
131    /// In closed state: increments failure count, opens circuit if threshold reached
132    /// In half-open state: immediately opens circuit
133    pub fn record_failure(&mut self) {
134        self.last_failure = Some(Instant::now());
135
136        match self.state {
137            CircuitState::Closed => {
138                self.failure_count += 1;
139                if self.failure_count >= self.config.failure_threshold {
140                    self.state = CircuitState::Open;
141                }
142            }
143            CircuitState::HalfOpen => {
144                self.state = CircuitState::Open;
145            }
146            CircuitState::Open => {}
147        }
148    }
149
150    /// Get current circuit state
151    pub fn state(&self) -> CircuitState {
152        self.state
153    }
154
155    /// Get current failure count
156    pub fn failure_count(&self) -> u32 {
157        self.failure_count
158    }
159
160    /// Get current success count (in half-open state)
161    pub fn success_count(&self) -> u32 {
162        self.success_count
163    }
164
165    /// Get time since last failure
166    pub fn time_since_last_failure(&self) -> Option<Duration> {
167        self.last_failure.map(|t| t.elapsed())
168    }
169
170    /// Reset the circuit breaker to initial state
171    pub fn reset(&mut self) {
172        self.state = CircuitState::Closed;
173        self.failure_count = 0;
174        self.success_count = 0;
175        self.last_failure = None;
176    }
177
178    /// Force the circuit open
179    pub fn force_open(&mut self) {
180        self.state = CircuitState::Open;
181        self.last_failure = Some(Instant::now());
182    }
183
184    /// Force the circuit closed
185    pub fn force_closed(&mut self) {
186        self.state = CircuitState::Closed;
187        self.failure_count = 0;
188        self.success_count = 0;
189    }
190}
191
192impl Default for CircuitBreaker {
193    fn default() -> Self {
194        Self::new()
195    }
196}
197
198/// Retry strategy for operations
199#[derive(Debug, Clone, Default)]
200pub enum RetryStrategy {
201    /// No retries
202    #[default]
203    None,
204    /// Fixed number of retries with constant delay
205    Fixed {
206        /// Maximum number of retry attempts
207        max_retries: u32,
208        /// Delay between retries
209        delay: Duration,
210    },
211    /// Exponential backoff with jitter
212    ExponentialBackoff {
213        /// Maximum number of retry attempts
214        max_retries: u32,
215        /// Initial delay
216        initial_delay: Duration,
217        /// Maximum delay
218        max_delay: Duration,
219        /// Multiplier for each attempt
220        multiplier: f64,
221    },
222    /// Linear backoff
223    LinearBackoff {
224        /// Maximum number of retry attempts
225        max_retries: u32,
226        /// Initial delay
227        initial_delay: Duration,
228        /// Increment per attempt
229        increment: Duration,
230        /// Maximum delay
231        max_delay: Duration,
232    },
233}
234
235impl RetryStrategy {
236    /// Calculate delay for a given attempt number (0-indexed)
237    ///
238    /// Returns None if no more retries should be attempted
239    pub fn delay_for_attempt(&self, attempt: u32) -> Option<Duration> {
240        match self {
241            Self::None => None,
242            Self::Fixed { max_retries, delay } => {
243                if attempt < *max_retries {
244                    Some(*delay)
245                } else {
246                    None
247                }
248            }
249            Self::ExponentialBackoff {
250                max_retries,
251                initial_delay,
252                max_delay,
253                multiplier,
254            } => {
255                if attempt < *max_retries {
256                    let delay_ms =
257                        initial_delay.as_millis() as f64 * multiplier.powi(attempt as i32);
258                    let delay = Duration::from_millis(delay_ms as u64);
259                    Some(delay.min(*max_delay))
260                } else {
261                    None
262                }
263            }
264            Self::LinearBackoff {
265                max_retries,
266                initial_delay,
267                increment,
268                max_delay,
269            } => {
270                if attempt < *max_retries {
271                    let delay = *initial_delay + (*increment * attempt);
272                    Some(delay.min(*max_delay))
273                } else {
274                    None
275                }
276            }
277        }
278    }
279
280    /// Get maximum number of retries
281    pub fn max_retries(&self) -> u32 {
282        match self {
283            Self::None => 0,
284            Self::Fixed { max_retries, .. }
285            | Self::ExponentialBackoff { max_retries, .. }
286            | Self::LinearBackoff { max_retries, .. } => *max_retries,
287        }
288    }
289
290    /// Create a fixed retry strategy
291    pub fn fixed(max_retries: u32, delay: Duration) -> Self {
292        Self::Fixed { max_retries, delay }
293    }
294
295    /// Create an exponential backoff strategy
296    pub fn exponential(max_retries: u32, initial_delay: Duration) -> Self {
297        Self::ExponentialBackoff {
298            max_retries,
299            initial_delay,
300            max_delay: Duration::from_secs(30),
301            multiplier: 2.0,
302        }
303    }
304
305    /// Create a linear backoff strategy
306    pub fn linear(max_retries: u32, initial_delay: Duration, increment: Duration) -> Self {
307        Self::LinearBackoff {
308            max_retries,
309            initial_delay,
310            increment,
311            max_delay: Duration::from_secs(30),
312        }
313    }
314}
315
316/// Execute an operation with retry logic
317///
318/// Retries the operation according to the strategy, sleeping between attempts.
319///
320/// # Example
321///
322/// ```ignore
323/// use alec::recovery::{RetryStrategy, with_retry};
324/// use std::time::Duration;
325///
326/// let strategy = RetryStrategy::exponential(3, Duration::from_millis(100));
327/// let result = with_retry(&strategy, || {
328///     // Your fallible operation here
329///     Ok::<_, &str>(42)
330/// });
331/// ```
332pub fn with_retry<T, E, F>(strategy: &RetryStrategy, mut operation: F) -> Result<T, E>
333where
334    F: FnMut() -> Result<T, E>,
335{
336    let mut attempt = 0;
337    loop {
338        match operation() {
339            Ok(result) => return Ok(result),
340            Err(e) => {
341                if let Some(delay) = strategy.delay_for_attempt(attempt) {
342                    std::thread::sleep(delay);
343                    attempt += 1;
344                } else {
345                    return Err(e);
346                }
347            }
348        }
349    }
350}
351
352/// Result of a retry operation with metrics
353#[derive(Debug, Clone)]
354pub struct RetryResult<T> {
355    /// The result value
356    pub value: T,
357    /// Number of attempts made
358    pub attempts: u32,
359    /// Total time spent retrying
360    pub total_duration: Duration,
361}
362
363/// Execute an operation with retry logic and return metrics
364pub fn with_retry_metrics<T, E, F>(
365    strategy: &RetryStrategy,
366    mut operation: F,
367) -> Result<RetryResult<T>, E>
368where
369    F: FnMut() -> Result<T, E>,
370{
371    let start = Instant::now();
372    let mut attempt = 0;
373    loop {
374        match operation() {
375            Ok(result) => {
376                return Ok(RetryResult {
377                    value: result,
378                    attempts: attempt + 1,
379                    total_duration: start.elapsed(),
380                })
381            }
382            Err(e) => {
383                if let Some(delay) = strategy.delay_for_attempt(attempt) {
384                    std::thread::sleep(delay);
385                    attempt += 1;
386                } else {
387                    return Err(e);
388                }
389            }
390        }
391    }
392}
393
394/// Graceful degradation level
395#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
396pub enum DegradationLevel {
397    /// Normal operation
398    #[default]
399    Normal,
400    /// Light degradation - non-essential features disabled
401    Light,
402    /// Moderate degradation - some features disabled
403    Moderate,
404    /// Heavy degradation - only essential features
405    Heavy,
406    /// Emergency - minimal operation
407    Emergency,
408}
409
410impl DegradationLevel {
411    /// Check if features at the given level should be disabled
412    pub fn should_disable(&self, feature_level: DegradationLevel) -> bool {
413        *self >= feature_level
414    }
415
416    /// Get next degradation level
417    pub fn escalate(&self) -> Self {
418        match self {
419            Self::Normal => Self::Light,
420            Self::Light => Self::Moderate,
421            Self::Moderate => Self::Heavy,
422            Self::Heavy => Self::Emergency,
423            Self::Emergency => Self::Emergency,
424        }
425    }
426
427    /// Get previous degradation level
428    pub fn de_escalate(&self) -> Self {
429        match self {
430            Self::Normal => Self::Normal,
431            Self::Light => Self::Normal,
432            Self::Moderate => Self::Light,
433            Self::Heavy => Self::Moderate,
434            Self::Emergency => Self::Heavy,
435        }
436    }
437}
438
439#[cfg(test)]
440mod tests {
441    use super::*;
442
443    #[test]
444    fn test_circuit_state_default() {
445        assert_eq!(CircuitState::default(), CircuitState::Closed);
446    }
447
448    #[test]
449    fn test_circuit_state_allows_requests() {
450        assert!(CircuitState::Closed.allows_requests());
451        assert!(!CircuitState::Open.allows_requests());
452        assert!(CircuitState::HalfOpen.allows_requests());
453    }
454
455    #[test]
456    fn test_circuit_breaker_opens() {
457        let mut cb = CircuitBreaker::with_config(CircuitConfig {
458            failure_threshold: 3,
459            ..Default::default()
460        });
461
462        assert_eq!(cb.state(), CircuitState::Closed);
463
464        cb.record_failure();
465        cb.record_failure();
466        assert_eq!(cb.state(), CircuitState::Closed);
467        assert_eq!(cb.failure_count(), 2);
468
469        cb.record_failure();
470        assert_eq!(cb.state(), CircuitState::Open);
471    }
472
473    #[test]
474    fn test_circuit_breaker_success_resets_count() {
475        let mut cb = CircuitBreaker::with_config(CircuitConfig {
476            failure_threshold: 3,
477            ..Default::default()
478        });
479
480        cb.record_failure();
481        cb.record_failure();
482        assert_eq!(cb.failure_count(), 2);
483
484        cb.record_success();
485        assert_eq!(cb.failure_count(), 0);
486    }
487
488    #[test]
489    fn test_circuit_breaker_recovery() {
490        let mut cb = CircuitBreaker::with_config(CircuitConfig {
491            failure_threshold: 1,
492            success_threshold: 2,
493            recovery_timeout: Duration::from_millis(10),
494        });
495
496        cb.record_failure();
497        assert_eq!(cb.state(), CircuitState::Open);
498        assert!(!cb.should_allow());
499
500        // Wait for recovery
501        std::thread::sleep(Duration::from_millis(15));
502
503        assert!(cb.should_allow());
504        assert_eq!(cb.state(), CircuitState::HalfOpen);
505
506        cb.record_success();
507        assert_eq!(cb.state(), CircuitState::HalfOpen);
508
509        cb.record_success();
510        assert_eq!(cb.state(), CircuitState::Closed);
511    }
512
513    #[test]
514    fn test_circuit_breaker_half_open_failure() {
515        let mut cb = CircuitBreaker::with_config(CircuitConfig {
516            failure_threshold: 1,
517            success_threshold: 2,
518            recovery_timeout: Duration::from_millis(10),
519        });
520
521        cb.record_failure();
522        std::thread::sleep(Duration::from_millis(15));
523        cb.should_allow(); // Transition to half-open
524
525        assert_eq!(cb.state(), CircuitState::HalfOpen);
526
527        cb.record_failure();
528        assert_eq!(cb.state(), CircuitState::Open);
529    }
530
531    #[test]
532    fn test_circuit_breaker_reset() {
533        let mut cb = CircuitBreaker::new();
534        cb.record_failure();
535        cb.record_failure();
536        cb.record_failure();
537        cb.record_failure();
538        cb.record_failure();
539        assert_eq!(cb.state(), CircuitState::Open);
540
541        cb.reset();
542        assert_eq!(cb.state(), CircuitState::Closed);
543        assert_eq!(cb.failure_count(), 0);
544    }
545
546    #[test]
547    fn test_circuit_breaker_force_open() {
548        let mut cb = CircuitBreaker::new();
549        assert_eq!(cb.state(), CircuitState::Closed);
550
551        cb.force_open();
552        assert_eq!(cb.state(), CircuitState::Open);
553    }
554
555    #[test]
556    fn test_circuit_breaker_force_closed() {
557        let mut cb = CircuitBreaker::new();
558        cb.force_open();
559        assert_eq!(cb.state(), CircuitState::Open);
560
561        cb.force_closed();
562        assert_eq!(cb.state(), CircuitState::Closed);
563    }
564
565    #[test]
566    fn test_retry_strategy_none() {
567        let strategy = RetryStrategy::None;
568        assert_eq!(strategy.delay_for_attempt(0), None);
569        assert_eq!(strategy.max_retries(), 0);
570    }
571
572    #[test]
573    fn test_retry_strategy_fixed() {
574        let strategy = RetryStrategy::Fixed {
575            max_retries: 3,
576            delay: Duration::from_millis(100),
577        };
578
579        assert_eq!(
580            strategy.delay_for_attempt(0),
581            Some(Duration::from_millis(100))
582        );
583        assert_eq!(
584            strategy.delay_for_attempt(1),
585            Some(Duration::from_millis(100))
586        );
587        assert_eq!(
588            strategy.delay_for_attempt(2),
589            Some(Duration::from_millis(100))
590        );
591        assert_eq!(strategy.delay_for_attempt(3), None);
592    }
593
594    #[test]
595    fn test_exponential_backoff() {
596        let strategy = RetryStrategy::ExponentialBackoff {
597            max_retries: 5,
598            initial_delay: Duration::from_millis(100),
599            max_delay: Duration::from_secs(10),
600            multiplier: 2.0,
601        };
602
603        assert_eq!(
604            strategy.delay_for_attempt(0),
605            Some(Duration::from_millis(100))
606        );
607        assert_eq!(
608            strategy.delay_for_attempt(1),
609            Some(Duration::from_millis(200))
610        );
611        assert_eq!(
612            strategy.delay_for_attempt(2),
613            Some(Duration::from_millis(400))
614        );
615        assert_eq!(
616            strategy.delay_for_attempt(3),
617            Some(Duration::from_millis(800))
618        );
619        assert_eq!(
620            strategy.delay_for_attempt(4),
621            Some(Duration::from_millis(1600))
622        );
623        assert_eq!(strategy.delay_for_attempt(5), None);
624    }
625
626    #[test]
627    fn test_exponential_backoff_max_delay() {
628        let strategy = RetryStrategy::ExponentialBackoff {
629            max_retries: 10,
630            initial_delay: Duration::from_millis(100),
631            max_delay: Duration::from_millis(500),
632            multiplier: 2.0,
633        };
634
635        assert_eq!(
636            strategy.delay_for_attempt(0),
637            Some(Duration::from_millis(100))
638        );
639        assert_eq!(
640            strategy.delay_for_attempt(1),
641            Some(Duration::from_millis(200))
642        );
643        assert_eq!(
644            strategy.delay_for_attempt(2),
645            Some(Duration::from_millis(400))
646        );
647        // Capped at max_delay
648        assert_eq!(
649            strategy.delay_for_attempt(3),
650            Some(Duration::from_millis(500))
651        );
652        assert_eq!(
653            strategy.delay_for_attempt(4),
654            Some(Duration::from_millis(500))
655        );
656    }
657
658    #[test]
659    fn test_linear_backoff() {
660        let strategy = RetryStrategy::LinearBackoff {
661            max_retries: 4,
662            initial_delay: Duration::from_millis(100),
663            increment: Duration::from_millis(50),
664            max_delay: Duration::from_secs(1),
665        };
666
667        assert_eq!(
668            strategy.delay_for_attempt(0),
669            Some(Duration::from_millis(100))
670        );
671        assert_eq!(
672            strategy.delay_for_attempt(1),
673            Some(Duration::from_millis(150))
674        );
675        assert_eq!(
676            strategy.delay_for_attempt(2),
677            Some(Duration::from_millis(200))
678        );
679        assert_eq!(
680            strategy.delay_for_attempt(3),
681            Some(Duration::from_millis(250))
682        );
683        assert_eq!(strategy.delay_for_attempt(4), None);
684    }
685
686    #[test]
687    fn test_retry_strategy_helpers() {
688        let fixed = RetryStrategy::fixed(3, Duration::from_millis(100));
689        assert_eq!(fixed.max_retries(), 3);
690
691        let exp = RetryStrategy::exponential(5, Duration::from_millis(50));
692        assert_eq!(exp.max_retries(), 5);
693
694        let linear =
695            RetryStrategy::linear(4, Duration::from_millis(100), Duration::from_millis(25));
696        assert_eq!(linear.max_retries(), 4);
697    }
698
699    #[test]
700    fn test_with_retry_success() {
701        let strategy = RetryStrategy::fixed(3, Duration::from_millis(1));
702        let result = with_retry(&strategy, || Ok::<_, &str>(42));
703        assert_eq!(result.unwrap(), 42);
704    }
705
706    #[test]
707    fn test_with_retry_eventual_success() {
708        let strategy = RetryStrategy::fixed(3, Duration::from_millis(1));
709        let mut attempts = 0;
710        let result = with_retry(&strategy, || {
711            attempts += 1;
712            if attempts < 3 {
713                Err("not yet")
714            } else {
715                Ok(42)
716            }
717        });
718        assert_eq!(result.unwrap(), 42);
719        assert_eq!(attempts, 3);
720    }
721
722    #[test]
723    fn test_with_retry_all_failures() {
724        let strategy = RetryStrategy::fixed(2, Duration::from_millis(1));
725        let mut attempts = 0;
726        let result = with_retry(&strategy, || {
727            attempts += 1;
728            Err::<i32, _>("always fails")
729        });
730        assert!(result.is_err());
731        assert_eq!(attempts, 3); // Initial + 2 retries
732    }
733
734    #[test]
735    fn test_with_retry_metrics() {
736        let strategy = RetryStrategy::fixed(2, Duration::from_millis(1));
737        let result = with_retry_metrics(&strategy, || Ok::<_, &str>(42));
738        let metrics = result.unwrap();
739        assert_eq!(metrics.value, 42);
740        assert_eq!(metrics.attempts, 1);
741    }
742
743    #[test]
744    fn test_degradation_level_ordering() {
745        assert!(DegradationLevel::Normal < DegradationLevel::Light);
746        assert!(DegradationLevel::Light < DegradationLevel::Moderate);
747        assert!(DegradationLevel::Moderate < DegradationLevel::Heavy);
748        assert!(DegradationLevel::Heavy < DegradationLevel::Emergency);
749    }
750
751    #[test]
752    fn test_degradation_level_should_disable() {
753        let level = DegradationLevel::Moderate;
754
755        assert!(!level.should_disable(DegradationLevel::Heavy));
756        assert!(!level.should_disable(DegradationLevel::Emergency));
757        assert!(level.should_disable(DegradationLevel::Moderate));
758        assert!(level.should_disable(DegradationLevel::Light));
759        assert!(level.should_disable(DegradationLevel::Normal));
760    }
761
762    #[test]
763    fn test_degradation_level_escalate() {
764        assert_eq!(DegradationLevel::Normal.escalate(), DegradationLevel::Light);
765        assert_eq!(
766            DegradationLevel::Light.escalate(),
767            DegradationLevel::Moderate
768        );
769        assert_eq!(
770            DegradationLevel::Moderate.escalate(),
771            DegradationLevel::Heavy
772        );
773        assert_eq!(
774            DegradationLevel::Heavy.escalate(),
775            DegradationLevel::Emergency
776        );
777        assert_eq!(
778            DegradationLevel::Emergency.escalate(),
779            DegradationLevel::Emergency
780        );
781    }
782
783    #[test]
784    fn test_degradation_level_de_escalate() {
785        assert_eq!(
786            DegradationLevel::Emergency.de_escalate(),
787            DegradationLevel::Heavy
788        );
789        assert_eq!(
790            DegradationLevel::Heavy.de_escalate(),
791            DegradationLevel::Moderate
792        );
793        assert_eq!(
794            DegradationLevel::Moderate.de_escalate(),
795            DegradationLevel::Light
796        );
797        assert_eq!(
798            DegradationLevel::Light.de_escalate(),
799            DegradationLevel::Normal
800        );
801        assert_eq!(
802            DegradationLevel::Normal.de_escalate(),
803            DegradationLevel::Normal
804        );
805    }
806}