chie_shared/utils/
circuit_breaker.rs

1//! Circuit breaker state machine for fault tolerance
2//!
3//! Implements the circuit breaker pattern to prevent cascading failures
4//! in distributed systems by temporarily blocking requests to failing services.
5
6use std::time::{Duration, Instant};
7
8/// Circuit breaker states
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum CircuitState {
11    /// Circuit is closed, requests flow normally
12    Closed,
13    /// Circuit is open, requests are blocked
14    Open,
15    /// Circuit is half-open, testing if service recovered
16    HalfOpen,
17}
18
19/// Circuit breaker for fault tolerance
20///
21/// The circuit breaker monitors failures and automatically transitions between states:
22/// - Closed: Normal operation, requests pass through
23/// - Open: Too many failures, requests are blocked
24/// - HalfOpen: Testing recovery, limited requests allowed
25///
26/// # Examples
27///
28/// ```
29/// use chie_shared::CircuitBreaker;
30///
31/// let mut breaker = CircuitBreaker::new(5, 60_000, 30_000);
32///
33/// // Record successful requests
34/// breaker.record_success();
35/// assert!(breaker.is_closed());
36///
37/// // Record failures
38/// for _ in 0..5 {
39///     breaker.record_failure();
40/// }
41/// assert!(breaker.is_open());
42/// ```
43#[derive(Debug, Clone)]
44pub struct CircuitBreaker {
45    /// Current state of the circuit
46    state: CircuitState,
47    /// Number of consecutive failures
48    failure_count: u32,
49    /// Failure threshold to open circuit
50    failure_threshold: u32,
51    /// Duration to keep circuit open (milliseconds)
52    timeout_ms: u64,
53    /// Duration for half-open state (milliseconds)
54    #[allow(dead_code)]
55    half_open_timeout_ms: u64,
56    /// Time when circuit was opened
57    opened_at: Option<Instant>,
58    /// Number of successful requests in half-open state
59    half_open_successes: u32,
60    /// Number of requests to allow in half-open state
61    half_open_max_requests: u32,
62}
63
64impl CircuitBreaker {
65    /// Create a new circuit breaker
66    ///
67    /// # Arguments
68    ///
69    /// * `failure_threshold` - Number of failures before opening circuit
70    /// * `timeout_ms` - Milliseconds to keep circuit open
71    /// * `half_open_timeout_ms` - Milliseconds for half-open state
72    #[must_use]
73    pub fn new(failure_threshold: u32, timeout_ms: u64, half_open_timeout_ms: u64) -> Self {
74        Self {
75            state: CircuitState::Closed,
76            failure_count: 0,
77            failure_threshold,
78            timeout_ms,
79            half_open_timeout_ms,
80            opened_at: None,
81            half_open_successes: 0,
82            half_open_max_requests: 3,
83        }
84    }
85
86    /// Create a circuit breaker with default settings
87    ///
88    /// Defaults: 5 failures, 60s timeout, 30s half-open timeout
89    #[must_use]
90    pub fn with_defaults() -> Self {
91        Self::new(5, 60_000, 30_000)
92    }
93
94    /// Get the current state
95    #[must_use]
96    pub fn state(&self) -> CircuitState {
97        self.state
98    }
99
100    /// Check if circuit is closed (requests allowed)
101    #[must_use]
102    pub fn is_closed(&self) -> bool {
103        self.check_state_transition();
104        self.state == CircuitState::Closed
105    }
106
107    /// Check if circuit is open (requests blocked)
108    #[must_use]
109    pub fn is_open(&self) -> bool {
110        self.check_state_transition();
111        self.state == CircuitState::Open
112    }
113
114    /// Check if circuit is half-open (testing recovery)
115    #[must_use]
116    pub fn is_half_open(&self) -> bool {
117        self.check_state_transition();
118        self.state == CircuitState::HalfOpen
119    }
120
121    /// Check if a request is allowed through the circuit
122    #[must_use]
123    pub fn allow_request(&self) -> bool {
124        match self.state {
125            CircuitState::Closed => true,
126            CircuitState::Open => {
127                // Check if timeout elapsed
128                if let Some(opened_at) = self.opened_at {
129                    opened_at.elapsed() >= Duration::from_millis(self.timeout_ms)
130                } else {
131                    false
132                }
133            }
134            CircuitState::HalfOpen => {
135                // Allow limited requests in half-open state
136                self.half_open_successes < self.half_open_max_requests
137            }
138        }
139    }
140
141    /// Record a successful request
142    pub fn record_success(&mut self) {
143        match self.state {
144            CircuitState::Closed => {
145                // Reset failure count on success
146                self.failure_count = 0;
147            }
148            CircuitState::HalfOpen => {
149                self.half_open_successes += 1;
150                // If enough successes, close the circuit
151                if self.half_open_successes >= self.half_open_max_requests {
152                    self.transition_to_closed();
153                }
154            }
155            CircuitState::Open => {
156                // Success in open state shouldn't happen normally
157                // but if it does, transition to half-open
158                self.transition_to_half_open();
159            }
160        }
161    }
162
163    /// Record a failed request
164    pub fn record_failure(&mut self) {
165        match self.state {
166            CircuitState::Closed => {
167                self.failure_count += 1;
168                if self.failure_count >= self.failure_threshold {
169                    self.transition_to_open();
170                }
171            }
172            CircuitState::HalfOpen => {
173                // Failure in half-open means service still not recovered
174                self.transition_to_open();
175            }
176            CircuitState::Open => {
177                // Already open, just update timestamp
178                self.opened_at = Some(Instant::now());
179            }
180        }
181    }
182
183    /// Get failure count
184    #[must_use]
185    pub fn failure_count(&self) -> u32 {
186        self.failure_count
187    }
188
189    /// Get time remaining until circuit can transition from open to half-open
190    #[must_use]
191    pub fn time_until_half_open(&self) -> Option<Duration> {
192        if self.state != CircuitState::Open {
193            return None;
194        }
195
196        self.opened_at.and_then(|opened_at| {
197            let elapsed = opened_at.elapsed();
198            let timeout = Duration::from_millis(self.timeout_ms);
199            timeout.checked_sub(elapsed)
200        })
201    }
202
203    /// Reset the circuit breaker to closed state
204    pub fn reset(&mut self) {
205        self.transition_to_closed();
206    }
207
208    /// Force circuit to open state
209    pub fn force_open(&mut self) {
210        self.transition_to_open();
211    }
212
213    // Internal state transitions
214
215    fn check_state_transition(&self) {
216        // This is a non-mutating check, actual transition happens in allow_request
217    }
218
219    fn transition_to_closed(&mut self) {
220        self.state = CircuitState::Closed;
221        self.failure_count = 0;
222        self.opened_at = None;
223        self.half_open_successes = 0;
224    }
225
226    fn transition_to_open(&mut self) {
227        self.state = CircuitState::Open;
228        self.opened_at = Some(Instant::now());
229        self.half_open_successes = 0;
230    }
231
232    fn transition_to_half_open(&mut self) {
233        self.state = CircuitState::HalfOpen;
234        self.half_open_successes = 0;
235    }
236}
237
238impl Default for CircuitBreaker {
239    fn default() -> Self {
240        Self::with_defaults()
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use std::thread::sleep;
248
249    #[test]
250    fn test_circuit_starts_closed() {
251        let breaker = CircuitBreaker::new(3, 1000, 500);
252        assert_eq!(breaker.state(), CircuitState::Closed);
253        assert!(breaker.is_closed());
254        assert!(!breaker.is_open());
255    }
256
257    #[test]
258    fn test_circuit_opens_after_threshold() {
259        let mut breaker = CircuitBreaker::new(3, 1000, 500);
260
261        // Record failures up to threshold
262        breaker.record_failure();
263        assert!(breaker.is_closed());
264
265        breaker.record_failure();
266        assert!(breaker.is_closed());
267
268        breaker.record_failure();
269        assert!(breaker.is_open());
270    }
271
272    #[test]
273    fn test_success_resets_failure_count() {
274        let mut breaker = CircuitBreaker::new(3, 1000, 500);
275
276        breaker.record_failure();
277        breaker.record_failure();
278        assert_eq!(breaker.failure_count(), 2);
279
280        breaker.record_success();
281        assert_eq!(breaker.failure_count(), 0);
282        assert!(breaker.is_closed());
283    }
284
285    #[test]
286    fn test_allow_request_closed() {
287        let breaker = CircuitBreaker::new(3, 1000, 500);
288        assert!(breaker.allow_request());
289    }
290
291    #[test]
292    fn test_allow_request_open() {
293        let mut breaker = CircuitBreaker::new(3, 100, 50); // Short timeout for testing
294
295        // Open the circuit
296        for _ in 0..3 {
297            breaker.record_failure();
298        }
299        assert!(breaker.is_open());
300        assert!(!breaker.allow_request());
301
302        // Wait for timeout
303        sleep(Duration::from_millis(150));
304        assert!(breaker.allow_request()); // Should allow after timeout
305    }
306
307    #[test]
308    fn test_half_open_recovery() {
309        let mut breaker = CircuitBreaker::new(3, 100, 50);
310
311        // Open circuit
312        for _ in 0..3 {
313            breaker.record_failure();
314        }
315        assert!(breaker.is_open());
316
317        // Wait for timeout and transition to half-open
318        sleep(Duration::from_millis(150));
319        assert!(breaker.allow_request());
320
321        // Manually transition to half-open (in real usage, allow_request would trigger this)
322        breaker.transition_to_half_open();
323        assert!(breaker.is_half_open());
324
325        // Record successful requests
326        breaker.record_success();
327        breaker.record_success();
328        breaker.record_success();
329
330        // Should be closed after enough successes
331        assert!(breaker.is_closed());
332    }
333
334    #[test]
335    fn test_half_open_failure_reopens() {
336        let mut breaker = CircuitBreaker::new(3, 100, 50);
337
338        // Open circuit
339        for _ in 0..3 {
340            breaker.record_failure();
341        }
342
343        // Transition to half-open
344        breaker.transition_to_half_open();
345        assert!(breaker.is_half_open());
346
347        // Failure in half-open should reopen circuit
348        breaker.record_failure();
349        assert!(breaker.is_open());
350    }
351
352    #[test]
353    fn test_reset() {
354        let mut breaker = CircuitBreaker::new(3, 1000, 500);
355
356        // Open circuit
357        for _ in 0..3 {
358            breaker.record_failure();
359        }
360        assert!(breaker.is_open());
361
362        // Reset should close it
363        breaker.reset();
364        assert!(breaker.is_closed());
365        assert_eq!(breaker.failure_count(), 0);
366    }
367
368    #[test]
369    fn test_force_open() {
370        let mut breaker = CircuitBreaker::new(3, 1000, 500);
371        assert!(breaker.is_closed());
372
373        breaker.force_open();
374        assert!(breaker.is_open());
375    }
376
377    #[test]
378    fn test_time_until_half_open() {
379        let mut breaker = CircuitBreaker::new(3, 1000, 500);
380
381        // No time until half-open when closed
382        assert!(breaker.time_until_half_open().is_none());
383
384        // Open circuit
385        for _ in 0..3 {
386            breaker.record_failure();
387        }
388
389        // Should have time remaining
390        let remaining = breaker.time_until_half_open();
391        assert!(remaining.is_some());
392        assert!(remaining.unwrap().as_millis() <= 1000);
393    }
394
395    #[test]
396    fn test_default_constructor() {
397        let breaker = CircuitBreaker::default();
398        assert_eq!(breaker.state(), CircuitState::Closed);
399        assert_eq!(breaker.failure_count(), 0);
400    }
401}