Skip to main content

chipzen_bot/
retry.rs

1//! Retry / backoff policy for the WebSocket client.
2//!
3//! When the connection to the Chipzen server drops (TCP reset, heartbeat
4//! miss, transient network failure, etc.) the SDK reconnects within the
5//! server's reconnect grace window. The pacing of those reconnect attempts
6//! is configurable via [`RetryPolicy`], accepted by both [`crate::run_bot`]
7//! and [`crate::run_external_bot`].
8//!
9//! ```
10//! use chipzen_bot::RetryPolicy;
11//!
12//! let policy = RetryPolicy::new(5, 500, 30_000, 2.0).unwrap();
13//! assert_eq!(policy.backoff_ms(1), 500);
14//! assert_eq!(policy.backoff_ms(2), 1000);
15//! ```
16//!
17//! The default policy mirrors the Python SDK's spec: 5 attempts, 500ms
18//! initial backoff, doubling each attempt, capped at 30 seconds. The
19//! defaults are sensible for the typical home-network deployment; devs on
20//! noisy connections may want a longer backoff or more attempts.
21//!
22//! Note: this policy controls **only** how reconnect attempts are paced.
23//! The 30-second server-side grace window itself is unchanged; if the
24//! reconnects burn through the window the session is considered lost and
25//! the server terminates the match-side state.
26
27use crate::error::Error;
28
29/// Default maximum reconnect attempts after a drop.
30pub const DEFAULT_MAX_RECONNECT_ATTEMPTS: u32 = 5;
31/// Default delay before the first reconnect attempt, in milliseconds.
32pub const DEFAULT_INITIAL_BACKOFF_MS: u64 = 500;
33/// Default upper bound for any single backoff delay, in milliseconds.
34pub const DEFAULT_MAX_BACKOFF_MS: u64 = 30_000;
35/// Default exponential multiplier applied between attempts.
36pub const DEFAULT_BACKOFF_MULTIPLIER: f64 = 2.0;
37
38/// Backoff knobs applied to reconnect attempts.
39///
40/// Backoff progression for attempt `n` (1-indexed) is:
41///
42/// ```text
43/// min(initial_backoff_ms * backoff_multiplier.powi(n - 1), max_backoff_ms)
44/// ```
45///
46/// Examples (defaults):
47///
48/// ```text
49/// attempt 1: 500 ms
50/// attempt 2: 1000 ms
51/// attempt 3: 2000 ms
52/// attempt 4: 4000 ms
53/// attempt 5: 8000 ms
54/// attempt 6: 16000 ms  (would be next, but capped by attempts=5)
55/// ```
56#[derive(Debug, Clone, Copy, PartialEq)]
57pub struct RetryPolicy {
58    /// Maximum number of reconnection attempts after a connection drop or
59    /// heartbeat miss. `0` disables reconnection entirely (the first connect
60    /// failure surfaces). Default `5`.
61    pub max_reconnect_attempts: u32,
62    /// Delay before the **first** reconnect attempt, in milliseconds.
63    /// Default `500`.
64    pub initial_backoff_ms: u64,
65    /// Upper bound for any single backoff delay, in milliseconds. Must be
66    /// `>= initial_backoff_ms`. Default `30_000` (30 seconds — matches the
67    /// server-side grace window so a single backoff never exceeds the window
68    /// itself).
69    pub max_backoff_ms: u64,
70    /// Exponential factor applied between attempts. `2.0` doubles the delay
71    /// each attempt. Must be `>= 1.0`; `1.0` produces constant backoff.
72    /// Default `2.0`.
73    pub backoff_multiplier: f64,
74}
75
76impl Default for RetryPolicy {
77    fn default() -> Self {
78        Self {
79            max_reconnect_attempts: DEFAULT_MAX_RECONNECT_ATTEMPTS,
80            initial_backoff_ms: DEFAULT_INITIAL_BACKOFF_MS,
81            max_backoff_ms: DEFAULT_MAX_BACKOFF_MS,
82            backoff_multiplier: DEFAULT_BACKOFF_MULTIPLIER,
83        }
84    }
85}
86
87impl RetryPolicy {
88    /// Construct a validated [`RetryPolicy`].
89    ///
90    /// Returns [`Error::Protocol`] if any knob is out of range. Mirrors the
91    /// Python SDK's `__post_init__` validation so an invalid policy fails
92    /// loudly at construction rather than producing surprising backoff.
93    pub fn new(
94        max_reconnect_attempts: u32,
95        initial_backoff_ms: u64,
96        max_backoff_ms: u64,
97        backoff_multiplier: f64,
98    ) -> Result<Self, Error> {
99        if max_backoff_ms < initial_backoff_ms {
100            return Err(Error::Protocol(format!(
101                "max_backoff_ms must be >= initial_backoff_ms ({max_backoff_ms} < {initial_backoff_ms})"
102            )));
103        }
104        if backoff_multiplier < 1.0 {
105            return Err(Error::Protocol(format!(
106                "backoff_multiplier must be >= 1.0, got {backoff_multiplier}"
107            )));
108        }
109        Ok(Self {
110            max_reconnect_attempts,
111            initial_backoff_ms,
112            max_backoff_ms,
113            backoff_multiplier,
114        })
115    }
116
117    /// Return the delay (in ms) to wait **before** the given attempt.
118    ///
119    /// `attempt` is 1-indexed: `attempt = 1` is the first reconnect after a
120    /// drop, `attempt = 2` the second, etc. An `attempt` of 0 is clamped to 1
121    /// (the first backoff) — the public contract is "give me the delay for
122    /// the Nth retry" and there is no zeroth retry.
123    pub fn backoff_ms(&self, attempt: u32) -> u64 {
124        let exponent = attempt.saturating_sub(1);
125        // Compute initial * multiplier.powi(exponent) in float, then clamp +
126        // round down to an integer. Clamping before the cast keeps the cap
127        // exact even when the float product overflows the cap by a fraction.
128        let raw = self.initial_backoff_ms as f64 * self.backoff_multiplier.powi(exponent as i32);
129        let capped = raw.min(self.max_backoff_ms as f64);
130        if capped.is_finite() && capped >= 0.0 {
131            capped as u64
132        } else {
133            self.max_backoff_ms
134        }
135    }
136}
137
138/// The default [`RetryPolicy`] used when a `run_*` entry point is called
139/// without an explicit policy: 5 attempts, 500ms initial backoff doubling to
140/// a 30s cap.
141pub fn default_retry_policy() -> RetryPolicy {
142    RetryPolicy::default()
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148
149    #[test]
150    fn defaults_match_the_python_spec() {
151        let p = RetryPolicy::default();
152        assert_eq!(p.max_reconnect_attempts, 5);
153        assert_eq!(p.initial_backoff_ms, 500);
154        assert_eq!(p.max_backoff_ms, 30_000);
155        assert_eq!(p.backoff_multiplier, 2.0);
156    }
157
158    #[test]
159    fn backoff_progression_doubles_and_caps() {
160        let p = RetryPolicy::default();
161        assert_eq!(p.backoff_ms(1), 500);
162        assert_eq!(p.backoff_ms(2), 1000);
163        assert_eq!(p.backoff_ms(3), 2000);
164        assert_eq!(p.backoff_ms(4), 4000);
165        assert_eq!(p.backoff_ms(5), 8000);
166        // Capped at 30_000 once the geometric series would exceed it.
167        assert_eq!(p.backoff_ms(20), 30_000);
168    }
169
170    #[test]
171    fn backoff_attempt_zero_clamps_to_first() {
172        // attempt 0 is treated as the first backoff (no zeroth retry exists).
173        assert_eq!(RetryPolicy::default().backoff_ms(0), 500);
174    }
175
176    #[test]
177    fn constant_backoff_with_multiplier_one() {
178        let p = RetryPolicy::new(3, 250, 1000, 1.0).unwrap();
179        assert_eq!(p.backoff_ms(1), 250);
180        assert_eq!(p.backoff_ms(5), 250);
181    }
182
183    #[test]
184    fn rejects_max_below_initial() {
185        let err = RetryPolicy::new(5, 1000, 500, 2.0).unwrap_err();
186        assert!(format!("{err}").contains("max_backoff_ms must be >= initial_backoff_ms"));
187    }
188
189    #[test]
190    fn rejects_multiplier_below_one() {
191        let err = RetryPolicy::new(5, 500, 30_000, 0.5).unwrap_err();
192        assert!(format!("{err}").contains("backoff_multiplier must be >= 1.0"));
193    }
194
195    #[test]
196    fn zero_attempts_is_allowed() {
197        // 0 disables reconnection — a valid policy, not an error.
198        assert_eq!(
199            RetryPolicy::new(0, 500, 30_000, 2.0)
200                .unwrap()
201                .max_reconnect_attempts,
202            0
203        );
204    }
205}