chipzen_bot/retry.rs
1//! Retry / backoff policy for the WebSocket client.
2//!
3//! When the connection to the Chipzen server drops (TCP reset, heartbeat
4//! miss, transient network failure, etc.) the SDK reconnects within the
5//! server's reconnect grace window. The pacing of those reconnect attempts
6//! is configurable via [`RetryPolicy`], accepted by both [`crate::run_bot`]
7//! and [`crate::run_external_bot`].
8//!
9//! ```
10//! use chipzen_bot::RetryPolicy;
11//!
12//! let policy = RetryPolicy::new(5, 500, 30_000, 2.0).unwrap();
13//! assert_eq!(policy.backoff_ms(1), 500);
14//! assert_eq!(policy.backoff_ms(2), 1000);
15//! ```
16//!
17//! The default policy mirrors the Python SDK's spec: 5 attempts, 500ms
18//! initial backoff, doubling each attempt, capped at 30 seconds. The
19//! defaults are sensible for the typical home-network deployment; devs on
20//! noisy connections may want a longer backoff or more attempts.
21//!
22//! Note: this policy controls **only** how reconnect attempts are paced.
23//! The 30-second server-side grace window itself is unchanged; if the
24//! reconnects burn through the window the session is considered lost and
25//! the server terminates the match-side state.
26
27use crate::error::Error;
28
29/// Default maximum reconnect attempts after a drop.
30pub const DEFAULT_MAX_RECONNECT_ATTEMPTS: u32 = 5;
31/// Default delay before the first reconnect attempt, in milliseconds.
32pub const DEFAULT_INITIAL_BACKOFF_MS: u64 = 500;
33/// Default upper bound for any single backoff delay, in milliseconds.
34pub const DEFAULT_MAX_BACKOFF_MS: u64 = 30_000;
35/// Default exponential multiplier applied between attempts.
36pub const DEFAULT_BACKOFF_MULTIPLIER: f64 = 2.0;
37
38/// Backoff knobs applied to reconnect attempts.
39///
40/// Backoff progression for attempt `n` (1-indexed) is:
41///
42/// ```text
43/// min(initial_backoff_ms * backoff_multiplier.powi(n - 1), max_backoff_ms)
44/// ```
45///
46/// Examples (defaults):
47///
48/// ```text
49/// attempt 1: 500 ms
50/// attempt 2: 1000 ms
51/// attempt 3: 2000 ms
52/// attempt 4: 4000 ms
53/// attempt 5: 8000 ms
54/// attempt 6: 16000 ms (would be next, but capped by attempts=5)
55/// ```
56#[derive(Debug, Clone, Copy, PartialEq)]
57pub struct RetryPolicy {
58 /// Maximum number of reconnection attempts after a connection drop or
59 /// heartbeat miss. `0` disables reconnection entirely (the first connect
60 /// failure surfaces). Default `5`.
61 pub max_reconnect_attempts: u32,
62 /// Delay before the **first** reconnect attempt, in milliseconds.
63 /// Default `500`.
64 pub initial_backoff_ms: u64,
65 /// Upper bound for any single backoff delay, in milliseconds. Must be
66 /// `>= initial_backoff_ms`. Default `30_000` (30 seconds — matches the
67 /// server-side grace window so a single backoff never exceeds the window
68 /// itself).
69 pub max_backoff_ms: u64,
70 /// Exponential factor applied between attempts. `2.0` doubles the delay
71 /// each attempt. Must be `>= 1.0`; `1.0` produces constant backoff.
72 /// Default `2.0`.
73 pub backoff_multiplier: f64,
74}
75
76impl Default for RetryPolicy {
77 fn default() -> Self {
78 Self {
79 max_reconnect_attempts: DEFAULT_MAX_RECONNECT_ATTEMPTS,
80 initial_backoff_ms: DEFAULT_INITIAL_BACKOFF_MS,
81 max_backoff_ms: DEFAULT_MAX_BACKOFF_MS,
82 backoff_multiplier: DEFAULT_BACKOFF_MULTIPLIER,
83 }
84 }
85}
86
87impl RetryPolicy {
88 /// Construct a validated [`RetryPolicy`].
89 ///
90 /// Returns [`Error::Protocol`] if any knob is out of range. Mirrors the
91 /// Python SDK's `__post_init__` validation so an invalid policy fails
92 /// loudly at construction rather than producing surprising backoff.
93 pub fn new(
94 max_reconnect_attempts: u32,
95 initial_backoff_ms: u64,
96 max_backoff_ms: u64,
97 backoff_multiplier: f64,
98 ) -> Result<Self, Error> {
99 if max_backoff_ms < initial_backoff_ms {
100 return Err(Error::Protocol(format!(
101 "max_backoff_ms must be >= initial_backoff_ms ({max_backoff_ms} < {initial_backoff_ms})"
102 )));
103 }
104 if backoff_multiplier < 1.0 {
105 return Err(Error::Protocol(format!(
106 "backoff_multiplier must be >= 1.0, got {backoff_multiplier}"
107 )));
108 }
109 Ok(Self {
110 max_reconnect_attempts,
111 initial_backoff_ms,
112 max_backoff_ms,
113 backoff_multiplier,
114 })
115 }
116
117 /// Return the delay (in ms) to wait **before** the given attempt.
118 ///
119 /// `attempt` is 1-indexed: `attempt = 1` is the first reconnect after a
120 /// drop, `attempt = 2` the second, etc. An `attempt` of 0 is clamped to 1
121 /// (the first backoff) — the public contract is "give me the delay for
122 /// the Nth retry" and there is no zeroth retry.
123 pub fn backoff_ms(&self, attempt: u32) -> u64 {
124 let exponent = attempt.saturating_sub(1);
125 // Compute initial * multiplier.powi(exponent) in float, then clamp +
126 // round down to an integer. Clamping before the cast keeps the cap
127 // exact even when the float product overflows the cap by a fraction.
128 let raw = self.initial_backoff_ms as f64 * self.backoff_multiplier.powi(exponent as i32);
129 let capped = raw.min(self.max_backoff_ms as f64);
130 if capped.is_finite() && capped >= 0.0 {
131 capped as u64
132 } else {
133 self.max_backoff_ms
134 }
135 }
136}
137
138/// The default [`RetryPolicy`] used when a `run_*` entry point is called
139/// without an explicit policy: 5 attempts, 500ms initial backoff doubling to
140/// a 30s cap.
141pub fn default_retry_policy() -> RetryPolicy {
142 RetryPolicy::default()
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148
149 #[test]
150 fn defaults_match_the_python_spec() {
151 let p = RetryPolicy::default();
152 assert_eq!(p.max_reconnect_attempts, 5);
153 assert_eq!(p.initial_backoff_ms, 500);
154 assert_eq!(p.max_backoff_ms, 30_000);
155 assert_eq!(p.backoff_multiplier, 2.0);
156 }
157
158 #[test]
159 fn backoff_progression_doubles_and_caps() {
160 let p = RetryPolicy::default();
161 assert_eq!(p.backoff_ms(1), 500);
162 assert_eq!(p.backoff_ms(2), 1000);
163 assert_eq!(p.backoff_ms(3), 2000);
164 assert_eq!(p.backoff_ms(4), 4000);
165 assert_eq!(p.backoff_ms(5), 8000);
166 // Capped at 30_000 once the geometric series would exceed it.
167 assert_eq!(p.backoff_ms(20), 30_000);
168 }
169
170 #[test]
171 fn backoff_attempt_zero_clamps_to_first() {
172 // attempt 0 is treated as the first backoff (no zeroth retry exists).
173 assert_eq!(RetryPolicy::default().backoff_ms(0), 500);
174 }
175
176 #[test]
177 fn constant_backoff_with_multiplier_one() {
178 let p = RetryPolicy::new(3, 250, 1000, 1.0).unwrap();
179 assert_eq!(p.backoff_ms(1), 250);
180 assert_eq!(p.backoff_ms(5), 250);
181 }
182
183 #[test]
184 fn rejects_max_below_initial() {
185 let err = RetryPolicy::new(5, 1000, 500, 2.0).unwrap_err();
186 assert!(format!("{err}").contains("max_backoff_ms must be >= initial_backoff_ms"));
187 }
188
189 #[test]
190 fn rejects_multiplier_below_one() {
191 let err = RetryPolicy::new(5, 500, 30_000, 0.5).unwrap_err();
192 assert!(format!("{err}").contains("backoff_multiplier must be >= 1.0"));
193 }
194
195 #[test]
196 fn zero_attempts_is_allowed() {
197 // 0 disables reconnection — a valid policy, not an error.
198 assert_eq!(
199 RetryPolicy::new(0, 500, 30_000, 2.0)
200 .unwrap()
201 .max_reconnect_attempts,
202 0
203 );
204 }
205}