Skip to main content

vcl_protocol/
reconnect.rs

1//! # VCL Reconnect
2//!
3//! Automatic reconnection with exponential backoff for VCL connections.
4//!
5//! ## How it works
6//!
7//! ```text
8//! Connection drops
9//!     ↓
10//! ReconnectManager::on_disconnect()
11//!     ↓
12//! Wait: backoff_interval (1s → 2s → 4s → 8s → ... → max_interval)
13//!     ↓
14//! ReconnectManager::should_reconnect() == true
15//!     ↓
16//! Attempt reconnect
17//!     ↓
18//! Success → ReconnectManager::on_connect() → reset backoff
19//! Failure → ReconnectManager::on_failure() → increase backoff
20//! ```
21//!
22//! ## Example
23//!
24//! ```rust
25//! use vcl_protocol::reconnect::{ReconnectManager, ReconnectConfig};
26//!
27//! let mut manager = ReconnectManager::new(ReconnectConfig::default());
28//!
29//! // Connection dropped
30//! manager.on_disconnect();
31//!
32//! loop {
33//!     if manager.should_reconnect() {
34//!         // attempt reconnect here...
35//!         let success = true; // result of reconnect attempt
36//!         if success {
37//!             manager.on_connect();
38//!             break;
39//!         } else {
40//!             manager.on_failure();
41//!             if manager.is_giving_up() {
42//!                 println!("Giving up after {} attempts", manager.attempts());
43//!                 break;
44//!             }
45//!         }
46//!     }
47//! }
48//! ```
49
50use std::time::{Duration, Instant};
51use tracing::{debug, info, warn};
52
53/// Configuration for automatic reconnection.
54#[derive(Debug, Clone)]
55pub struct ReconnectConfig {
56    /// Initial backoff interval before first reconnect attempt.
57    pub initial_interval: Duration,
58    /// Maximum backoff interval — exponential growth is capped here.
59    pub max_interval: Duration,
60    /// Backoff multiplier (default: 2.0 — doubles each failure).
61    pub multiplier: f64,
62    /// Random jitter factor 0.0–1.0 added to backoff to avoid thundering herd.
63    /// e.g. 0.2 adds ±20% random variation.
64    pub jitter: f64,
65    /// Maximum number of reconnect attempts before giving up.
66    /// `None` means retry forever.
67    pub max_attempts: Option<u32>,
68    /// How long a connection must stay up to be considered stable
69    /// (resets the backoff counter).
70    pub stable_threshold: Duration,
71}
72
73impl Default for ReconnectConfig {
74    fn default() -> Self {
75        ReconnectConfig {
76            initial_interval: Duration::from_secs(1),
77            max_interval: Duration::from_secs(60),
78            multiplier: 2.0,
79            jitter: 0.2,
80            max_attempts: None,
81            stable_threshold: Duration::from_secs(30),
82        }
83    }
84}
85
86impl ReconnectConfig {
87    /// Aggressive reconnect — for mobile networks.
88    /// Fast first retry, shorter max backoff.
89    pub fn mobile() -> Self {
90        ReconnectConfig {
91            initial_interval: Duration::from_millis(500),
92            max_interval: Duration::from_secs(30),
93            multiplier: 1.5,
94            jitter: 0.3,
95            max_attempts: None,
96            stable_threshold: Duration::from_secs(10),
97        }
98    }
99
100    /// Conservative reconnect — for stable networks.
101    pub fn stable() -> Self {
102        ReconnectConfig {
103            initial_interval: Duration::from_secs(2),
104            max_interval: Duration::from_secs(120),
105            multiplier: 2.0,
106            jitter: 0.1,
107            max_attempts: Some(10),
108            stable_threshold: Duration::from_secs(60),
109        }
110    }
111
112    /// Instant reconnect — for testing or LAN connections.
113    pub fn instant() -> Self {
114        ReconnectConfig {
115            initial_interval: Duration::from_millis(10),
116            max_interval: Duration::from_millis(100),
117            multiplier: 1.5,
118            jitter: 0.0,
119            max_attempts: Some(5),
120            stable_threshold: Duration::from_millis(100),
121        }
122    }
123}
124
125/// State of the reconnect manager.
126#[derive(Debug, Clone, PartialEq)]
127pub enum ReconnectState {
128    /// Connection is up and healthy.
129    Connected,
130    /// Waiting for backoff interval before next attempt.
131    WaitingBackoff,
132    /// Ready to attempt reconnect right now.
133    ReadyToReconnect,
134    /// Reconnect attempt in progress.
135    Reconnecting,
136    /// Gave up — max attempts reached.
137    GaveUp,
138}
139
140/// Manages automatic reconnection with exponential backoff and jitter.
141pub struct ReconnectManager {
142    config: ReconnectConfig,
143    state: ReconnectState,
144    /// Total reconnect attempts since last successful connect.
145    attempts: u32,
146    /// Total successful reconnects.
147    total_reconnects: u64,
148    /// Current backoff interval.
149    current_interval: Duration,
150    /// When we disconnected.
151    disconnected_at: Option<Instant>,
152    /// When we started waiting for this backoff.
153    backoff_started: Option<Instant>,
154    /// When we last successfully connected.
155    connected_at: Option<Instant>,
156    /// Cumulative downtime.
157    total_downtime: Duration,
158}
159
160impl ReconnectManager {
161    /// Create a new reconnect manager.
162    pub fn new(config: ReconnectConfig) -> Self {
163        let initial = config.initial_interval;
164        ReconnectManager {
165            config,
166            state: ReconnectState::Connected,
167            attempts: 0,
168            total_reconnects: 0,
169            current_interval: initial,
170            disconnected_at: None,
171            backoff_started: None,
172            connected_at: Some(Instant::now()),
173            total_downtime: Duration::ZERO,
174        }
175    }
176
177    /// Create with mobile preset.
178    pub fn mobile() -> Self {
179        Self::new(ReconnectConfig::mobile())
180    }
181
182    /// Create with stable preset.
183    pub fn stable() -> Self {
184        Self::new(ReconnectConfig::stable())
185    }
186
187    // ─── State transitions ────────────────────────────────────────────────────
188
189    /// Call when the connection drops.
190    ///
191    /// Starts the backoff timer and transitions to `WaitingBackoff`.
192    pub fn on_disconnect(&mut self) {
193        let now = Instant::now();
194        self.disconnected_at = Some(now);
195        self.backoff_started = Some(now);
196        self.state = ReconnectState::WaitingBackoff;
197
198        warn!(
199            attempts = self.attempts,
200            backoff_ms = self.current_interval.as_millis(),
201            "Connection lost — starting reconnect backoff"
202        );
203    }
204
205    /// Call when a reconnect attempt succeeds.
206    ///
207    /// Resets backoff if connection was stable, or keeps reduced backoff
208    /// if we reconnected quickly.
209    pub fn on_connect(&mut self) {
210        let now = Instant::now();
211
212        // Accumulate downtime
213        if let Some(disc) = self.disconnected_at.take() {
214            self.total_downtime += now.duration_since(disc);
215        }
216
217        self.total_reconnects += 1;
218        self.connected_at = Some(now);
219        self.state = ReconnectState::Connected;
220        self.backoff_started = None;
221
222        info!(
223            attempts = self.attempts,
224            total_reconnects = self.total_reconnects,
225            "Reconnect successful — resetting backoff"
226        );
227
228        // Reset backoff fully
229        self.attempts = 0;
230        self.current_interval = self.config.initial_interval;
231    }
232
233    /// Call when a reconnect attempt fails.
234    ///
235    /// Increases backoff interval exponentially with jitter.
236    pub fn on_failure(&mut self) {
237        self.attempts += 1;
238        self.state = ReconnectState::WaitingBackoff;
239
240        // Exponential backoff
241        let new_interval_secs = self.current_interval.as_secs_f64()
242            * self.config.multiplier;
243
244        // Add jitter
245        let jitter_range = new_interval_secs * self.config.jitter;
246        let jitter = if jitter_range > 0.0 {
247            // Deterministic pseudo-jitter based on attempt count
248            let j = (self.attempts as f64 * 0.618) % 1.0; // golden ratio
249            (j * 2.0 - 1.0) * jitter_range
250        } else {
251            0.0
252        };
253
254        let final_secs = (new_interval_secs + jitter)
255            .max(0.1)
256            .min(self.config.max_interval.as_secs_f64());
257
258        self.current_interval = Duration::from_secs_f64(final_secs);
259        self.backoff_started = Some(Instant::now());
260
261        warn!(
262            attempt = self.attempts,
263            next_backoff_ms = self.current_interval.as_millis(),
264            max_attempts = ?self.config.max_attempts,
265            "Reconnect failed — backing off"
266        );
267
268        // Check if we should give up
269        if let Some(max) = self.config.max_attempts {
270            if self.attempts >= max {
271                warn!(attempts = self.attempts, "Max reconnect attempts reached — giving up");
272                self.state = ReconnectState::GaveUp;
273            }
274        }
275    }
276
277    /// Call when a reconnect attempt is starting.
278    pub fn on_attempt_start(&mut self) {
279        self.state = ReconnectState::Reconnecting;
280        debug!(attempt = self.attempts + 1, "Reconnect attempt starting");
281    }
282
283    // ─── Polling interface ────────────────────────────────────────────────────
284
285    /// Returns `true` if it's time to attempt a reconnect right now.
286    ///
287    /// Call this periodically in your main loop.
288    pub fn should_reconnect(&mut self) -> bool {
289        if self.state == ReconnectState::GaveUp
290            || self.state == ReconnectState::Connected
291            || self.state == ReconnectState::Reconnecting
292        {
293            return false;
294        }
295
296        if let Some(started) = self.backoff_started {
297            if started.elapsed() >= self.current_interval {
298                self.state = ReconnectState::ReadyToReconnect;
299                return true;
300            }
301        }
302
303        false
304    }
305
306    /// Returns how long until the next reconnect attempt.
307    /// Returns `Duration::ZERO` if ready now.
308    pub fn time_until_reconnect(&self) -> Duration {
309        if let Some(started) = self.backoff_started {
310            let elapsed = started.elapsed();
311            if elapsed >= self.current_interval {
312                return Duration::ZERO;
313            }
314            return self.current_interval - elapsed;
315        }
316        Duration::ZERO
317    }
318
319    // ─── Stability check ──────────────────────────────────────────────────────
320
321    /// Check if the current connection has been stable long enough
322    /// to fully reset the backoff counter.
323    ///
324    /// Call periodically while connected.
325    pub fn check_stability(&mut self) {
326        if self.state != ReconnectState::Connected {
327            return;
328        }
329        if let Some(connected_at) = self.connected_at {
330            if connected_at.elapsed() >= self.config.stable_threshold && self.attempts > 0 {
331                info!("Connection stable — resetting backoff counter");
332                self.attempts = 0;
333                self.current_interval = self.config.initial_interval;
334            }
335        }
336    }
337
338    // ─── Stats ────────────────────────────────────────────────────────────────
339
340    /// Returns the current reconnect state.
341    pub fn state(&self) -> &ReconnectState {
342        &self.state
343    }
344
345    /// Returns `true` if the connection is currently up.
346    pub fn is_connected(&self) -> bool {
347        self.state == ReconnectState::Connected
348    }
349
350    /// Returns `true` if we have given up reconnecting.
351    pub fn is_giving_up(&self) -> bool {
352        self.state == ReconnectState::GaveUp
353    }
354
355    /// Returns the number of failed attempts since last successful connect.
356    pub fn attempts(&self) -> u32 {
357        self.attempts
358    }
359
360    /// Returns total successful reconnects.
361    pub fn total_reconnects(&self) -> u64 {
362        self.total_reconnects
363    }
364
365    /// Returns the current backoff interval.
366    pub fn current_interval(&self) -> Duration {
367        self.current_interval
368    }
369
370    /// Returns total accumulated downtime.
371    pub fn total_downtime(&self) -> Duration {
372        self.total_downtime
373    }
374
375    /// Returns a reference to the config.
376    pub fn config(&self) -> &ReconnectConfig {
377        &self.config
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384
385    fn instant_manager() -> ReconnectManager {
386        ReconnectManager::new(ReconnectConfig::instant())
387    }
388
389    #[test]
390    fn test_new() {
391        let m = ReconnectManager::new(ReconnectConfig::default());
392        assert_eq!(m.state(), &ReconnectState::Connected);
393        assert!(m.is_connected());
394        assert!(!m.is_giving_up());
395        assert_eq!(m.attempts(), 0);
396        assert_eq!(m.total_reconnects(), 0);
397    }
398
399    #[test]
400    fn test_on_disconnect() {
401        let mut m = instant_manager();
402        m.on_disconnect();
403        assert_eq!(m.state(), &ReconnectState::WaitingBackoff);
404        assert!(!m.is_connected());
405    }
406
407    #[test]
408    fn test_should_reconnect_after_backoff() {
409        let mut m = instant_manager();
410        m.on_disconnect();
411        std::thread::sleep(Duration::from_millis(20));
412        assert!(m.should_reconnect());
413        assert_eq!(m.state(), &ReconnectState::ReadyToReconnect);
414    }
415
416    #[test]
417    fn test_should_not_reconnect_before_backoff() {
418        let mut m = ReconnectManager::new(ReconnectConfig {
419            initial_interval: Duration::from_secs(60),
420            ..ReconnectConfig::default()
421        });
422        m.on_disconnect();
423        assert!(!m.should_reconnect());
424    }
425
426    #[test]
427    fn test_on_connect_resets_backoff() {
428        let mut m = instant_manager();
429        m.on_disconnect();
430        m.on_failure();
431        m.on_failure();
432        assert!(m.attempts() > 0);
433        m.on_connect();
434        assert_eq!(m.attempts(), 0);
435        assert_eq!(m.current_interval(), ReconnectConfig::instant().initial_interval);
436        assert!(m.is_connected());
437        assert_eq!(m.total_reconnects(), 1);
438    }
439
440    #[test]
441    fn test_on_failure_increases_backoff() {
442        let mut m = instant_manager();
443        m.on_disconnect();
444        let before = m.current_interval();
445        m.on_failure();
446        assert!(m.current_interval() >= before);
447        assert_eq!(m.attempts(), 1);
448    }
449
450    #[test]
451    fn test_max_attempts_gives_up() {
452        let mut m = ReconnectManager::new(ReconnectConfig {
453            max_attempts: Some(3),
454            ..ReconnectConfig::instant()
455        });
456        m.on_disconnect();
457        m.on_failure();
458        m.on_failure();
459        assert!(!m.is_giving_up());
460        m.on_failure();
461        assert!(m.is_giving_up());
462        assert_eq!(m.state(), &ReconnectState::GaveUp);
463    }
464
465    #[test]
466    fn test_no_reconnect_when_gave_up() {
467        let mut m = ReconnectManager::new(ReconnectConfig {
468            max_attempts: Some(1),
469            ..ReconnectConfig::instant()
470        });
471        m.on_disconnect();
472        m.on_failure();
473        assert!(m.is_giving_up());
474        assert!(!m.should_reconnect());
475    }
476
477    #[test]
478    fn test_no_reconnect_when_connected() {
479        let mut m = instant_manager();
480        assert!(!m.should_reconnect());
481    }
482
483    #[test]
484    fn test_no_reconnect_when_reconnecting() {
485        let mut m = instant_manager();
486        m.on_disconnect();
487        m.on_attempt_start();
488        assert_eq!(m.state(), &ReconnectState::Reconnecting);
489        assert!(!m.should_reconnect());
490    }
491
492    #[test]
493    fn test_total_downtime_accumulated() {
494        let mut m = instant_manager();
495        m.on_disconnect();
496        std::thread::sleep(Duration::from_millis(20));
497        m.on_connect();
498        assert!(m.total_downtime() >= Duration::from_millis(10));
499    }
500
501    #[test]
502    fn test_time_until_reconnect() {
503        let mut m = ReconnectManager::new(ReconnectConfig {
504            initial_interval: Duration::from_secs(60),
505            ..ReconnectConfig::default()
506        });
507        m.on_disconnect();
508        let remaining = m.time_until_reconnect();
509        assert!(remaining > Duration::from_secs(50));
510    }
511
512    #[test]
513    fn test_time_until_reconnect_zero_when_ready() {
514        let mut m = instant_manager();
515        m.on_disconnect();
516        std::thread::sleep(Duration::from_millis(20));
517        assert_eq!(m.time_until_reconnect(), Duration::ZERO);
518    }
519
520    #[test]
521    fn test_backoff_capped_at_max() {
522        let mut m = ReconnectManager::new(ReconnectConfig {
523            initial_interval: Duration::from_millis(10),
524            max_interval: Duration::from_millis(100),
525            multiplier: 10.0,
526            jitter: 0.0,
527            max_attempts: None,
528            stable_threshold: Duration::from_secs(30),
529        });
530        m.on_disconnect();
531        for _ in 0..10 {
532            m.on_failure();
533        }
534        assert!(m.current_interval() <= Duration::from_millis(100));
535    }
536
537    #[test]
538    fn test_mobile_preset() {
539        let m = ReconnectManager::mobile();
540        assert_eq!(m.config().initial_interval, Duration::from_millis(500));
541        assert!(m.config().max_attempts.is_none());
542    }
543
544    #[test]
545    fn test_stable_preset() {
546        let m = ReconnectManager::stable();
547        assert_eq!(m.config().max_attempts, Some(10));
548    }
549
550    #[test]
551    fn test_check_stability_resets_counter() {
552        let mut m = ReconnectManager::new(ReconnectConfig {
553            stable_threshold: Duration::from_millis(10),
554            ..ReconnectConfig::instant()
555        });
556        m.on_disconnect();
557        m.on_failure();
558        m.on_connect();
559        assert_eq!(m.attempts(), 0); // on_connect resets too
560    }
561
562    #[test]
563    fn test_multiple_reconnect_cycles() {
564        let mut m = instant_manager();
565        for _ in 0..3 {
566            m.on_disconnect();
567            std::thread::sleep(Duration::from_millis(20));
568            assert!(m.should_reconnect());
569            m.on_connect();
570        }
571        assert_eq!(m.total_reconnects(), 3);
572        assert!(m.is_connected());
573    }
574}