Skip to main content

vgi_rpc/
retry.rs

1//! Retry helpers.
2//!
3//! Exponential-backoff + jitter schedule matching the Python / Go
4//! reference clients. The crate ships server-only today; these helpers
5//! are exposed for symmetry so a future client crate (or user code
6//! talking to remote services) reuses the same semantics without
7//! redefining the policy.
8
9use std::time::Duration;
10
11/// Configuration for a retry schedule.
12#[derive(Clone, Debug)]
13pub struct RetryConfig {
14    /// Maximum number of attempts (including the first one). `1` disables retries.
15    pub max_attempts: u32,
16    /// Base delay for the first retry.
17    pub base_delay: Duration,
18    /// Maximum delay between attempts; the exponential curve caps here.
19    pub max_delay: Duration,
20    /// Multiplier applied to the delay each attempt (typically `2.0`).
21    pub multiplier: f64,
22    /// Random jitter fraction applied to each computed delay, in `[0, 1]`.
23    /// `0.0` disables jitter.
24    pub jitter: f64,
25}
26
27impl Default for RetryConfig {
28    fn default() -> Self {
29        Self {
30            max_attempts: 3,
31            base_delay: Duration::from_millis(100),
32            max_delay: Duration::from_secs(10),
33            multiplier: 2.0,
34            jitter: 0.2,
35        }
36    }
37}
38
39impl RetryConfig {
40    /// Convenience: `max_attempts=1` — no retries.
41    pub fn disabled() -> Self {
42        Self {
43            max_attempts: 1,
44            ..Default::default()
45        }
46    }
47
48    /// Compute the sleep before attempt `n` (0-indexed).
49    /// `n == 0` → caller is about to make the first attempt, no delay.
50    /// `n == 1` → delay before the first retry, and so on.
51    ///
52    /// Jitter is drawn from a real per-call entropy source, so callers
53    /// retrying in lockstep do **not** compute identical delays — that
54    /// decorrelation is the entire point of jitter (it prevents a
55    /// synchronized retry storm against a recovering server). For
56    /// reproducible delays in tests, use [`delay_before_with_jitter`].
57    ///
58    /// [`delay_before_with_jitter`]: Self::delay_before_with_jitter
59    pub fn delay_before(&self, attempt: u32) -> Duration {
60        self.delay_before_with_jitter(attempt, jitter_fraction())
61    }
62
63    /// Like [`delay_before`](Self::delay_before) but with the jitter
64    /// fraction supplied explicitly (in `[0, 1)`). Deterministic — used
65    /// by tests, or by callers that want to plug their own RNG.
66    pub fn delay_before_with_jitter(&self, attempt: u32, jitter_frac: f64) -> Duration {
67        if attempt == 0 {
68            return Duration::ZERO;
69        }
70        let exp = (attempt - 1) as i32;
71        let base = self.base_delay.as_secs_f64() * self.multiplier.powi(exp);
72        let mut d = base.min(self.max_delay.as_secs_f64());
73        if self.jitter > 0.0 {
74            let spread = d * self.jitter;
75            d += spread * (jitter_frac.clamp(0.0, 1.0) * 2.0 - 1.0);
76        }
77        // Guard against a non-finite result (e.g. a NaN `multiplier`)
78        // before `from_secs_f64`, which would otherwise panic.
79        if !d.is_finite() {
80            d = self.max_delay.as_secs_f64();
81        }
82        Duration::from_secs_f64(d.max(0.0))
83    }
84
85    /// Iterator over per-attempt delays (`attempt = 0..max_attempts`).
86    pub fn schedule(&self) -> impl Iterator<Item = Duration> + '_ {
87        (0..self.max_attempts).map(move |n| self.delay_before(n))
88    }
89}
90
91/// A jitter fraction in `[0, 1)` drawn from a real per-call entropy
92/// source: the wall clock's sub-second component mixed with a
93/// thread-local sequence counter, run through splitmix64. Not
94/// cryptographic — jitter does not need to be — but it does give every
95/// caller a distinct value, which a fixed hash of the attempt number
96/// (the previous implementation) did not.
97fn jitter_fraction() -> f64 {
98    use std::cell::Cell;
99    use std::time::{SystemTime, UNIX_EPOCH};
100
101    thread_local! {
102        static SEQ: Cell<u64> = const { Cell::new(0) };
103    }
104    let seq = SEQ.with(|c| {
105        let v = c.get().wrapping_add(1);
106        c.set(v);
107        v
108    });
109    let nanos = SystemTime::now()
110        .duration_since(UNIX_EPOCH)
111        .map(|d| d.as_nanos() as u64)
112        .unwrap_or(0);
113
114    // splitmix64 over the combined entropy.
115    let mut x = nanos
116        .wrapping_mul(0x9E37_79B9_7F4A_7C15)
117        .wrapping_add(seq.wrapping_mul(0xD1B5_4A32_D192_ED03));
118    x ^= x >> 30;
119    x = x.wrapping_mul(0xBF58_476D_1CE4_E5B9);
120    x ^= x >> 27;
121    x = x.wrapping_mul(0x94D0_49BB_1331_11EB);
122    x ^= x >> 31;
123    (x as f64) / (u64::MAX as f64)
124}
125
126#[cfg(test)]
127mod tests {
128    use super::*;
129
130    #[test]
131    fn first_attempt_has_no_delay() {
132        let cfg = RetryConfig::default();
133        assert_eq!(cfg.delay_before(0), Duration::ZERO);
134    }
135
136    #[test]
137    fn exponential_growth_capped_at_max() {
138        let cfg = RetryConfig {
139            max_attempts: 6,
140            base_delay: Duration::from_millis(100),
141            max_delay: Duration::from_millis(400),
142            multiplier: 2.0,
143            jitter: 0.0,
144        };
145        let delays: Vec<Duration> = cfg.schedule().collect();
146        assert_eq!(delays[0], Duration::ZERO);
147        assert_eq!(delays[1], Duration::from_millis(100));
148        assert_eq!(delays[2], Duration::from_millis(200));
149        assert_eq!(delays[3], Duration::from_millis(400)); // capped
150        assert_eq!(delays[4], Duration::from_millis(400));
151    }
152
153    #[test]
154    fn disabled_yields_single_zero_delay() {
155        let cfg = RetryConfig::disabled();
156        let delays: Vec<Duration> = cfg.schedule().collect();
157        assert_eq!(delays, vec![Duration::ZERO]);
158    }
159
160    #[test]
161    fn jitter_stays_non_negative() {
162        let cfg = RetryConfig {
163            max_attempts: 10,
164            base_delay: Duration::from_millis(1),
165            max_delay: Duration::from_secs(1),
166            multiplier: 2.0,
167            jitter: 0.9,
168        };
169        for d in cfg.schedule() {
170            assert!(d >= Duration::ZERO);
171        }
172    }
173
174    #[test]
175    fn jitter_is_not_deterministic_across_calls() {
176        // The whole point of jitter: two callers (or the same caller
177        // twice) must not compute identical delays for the same attempt.
178        let cfg = RetryConfig {
179            max_attempts: 2,
180            base_delay: Duration::from_millis(100),
181            max_delay: Duration::from_secs(10),
182            multiplier: 2.0,
183            jitter: 0.5,
184        };
185        let mut seen = std::collections::HashSet::new();
186        for _ in 0..50 {
187            seen.insert(cfg.delay_before(1).as_nanos());
188        }
189        assert!(
190            seen.len() > 1,
191            "jitter produced identical delays on every call"
192        );
193    }
194
195    #[test]
196    fn explicit_jitter_fraction_is_reproducible() {
197        let cfg = RetryConfig {
198            max_attempts: 2,
199            base_delay: Duration::from_millis(100),
200            max_delay: Duration::from_secs(10),
201            multiplier: 2.0,
202            jitter: 0.5,
203        };
204        let a = cfg.delay_before_with_jitter(1, 0.25);
205        let b = cfg.delay_before_with_jitter(1, 0.25);
206        assert_eq!(a, b);
207        // A different fraction yields a different delay.
208        assert_ne!(a, cfg.delay_before_with_jitter(1, 0.75));
209    }
210
211    #[test]
212    fn non_finite_multiplier_does_not_panic() {
213        let cfg = RetryConfig {
214            max_attempts: 3,
215            base_delay: Duration::from_millis(100),
216            max_delay: Duration::from_secs(10),
217            multiplier: f64::NAN,
218            jitter: 0.0,
219        };
220        // Must clamp to a finite delay rather than panicking in
221        // `Duration::from_secs_f64`.
222        let _ = cfg.delay_before_with_jitter(2, 0.0);
223    }
224}