Skip to main content

sqlite_graphrag/
retry.rs

1//! Centralized retry infrastructure with exponential backoff and half-jitter.
2//!
3//! Provides [`RetryConfig`](crate::retry::RetryConfig) with named constructors for each failure domain
4//! (SQLite BUSY, LLM rate-limit, cold-start) and a [`compute_delay`](crate::retry::compute_delay) function
5//! that applies the configured jitter strategy.
6
7use std::time::Duration;
8
9/// Configures retry behavior for a specific failure domain.
10///
11/// Use the named constructors ([`Self::sqlite_busy`], [`Self::llm_rate_limit`],
12/// [`Self::cold_start`]) for pre-tuned policies. All timing values are in
13/// milliseconds except `max_elapsed_secs` which is in seconds.
14#[derive(Debug, Clone)]
15pub struct RetryConfig {
16    /// Base delay for the first retry attempt (ms).
17    pub initial_delay_ms: u64,
18    /// Upper bound on any single delay (ms).
19    pub max_delay_ms: u64,
20    /// Multiplicative factor applied per attempt.
21    pub multiplier: u64,
22    /// Hard cap on total attempts (0 = unlimited, use deadline).
23    pub max_attempts: u32,
24    /// Total elapsed wall-clock time before giving up (seconds).
25    pub max_elapsed_secs: u64,
26    /// Jitter strategy applied to computed delays.
27    pub jitter: JitterKind,
28}
29
30/// Jitter strategy for randomizing retry delays.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum JitterKind {
33    /// No randomization — deterministic delay.
34    None,
35    /// Half-jitter: delay in [base/2, base). Guarantees minimum wait.
36    Half,
37    /// Full-jitter: delay in [0, base). Maximum spread.
38    Full,
39}
40
41impl RetryConfig {
42    /// SQLite BUSY retry: 5 attempts, 300ms base, half-jitter, 30s deadline.
43    pub fn sqlite_busy() -> Self {
44        Self {
45            initial_delay_ms: 300,
46            max_delay_ms: 4800,
47            multiplier: 2,
48            max_attempts: 5,
49            max_elapsed_secs: 30,
50            jitter: JitterKind::Half,
51        }
52    }
53
54    /// LLM rate-limit retry: 60s base, 900s cap, half-jitter, 1h deadline.
55    pub fn llm_rate_limit() -> Self {
56        Self {
57            initial_delay_ms: 60_000,
58            max_delay_ms: 900_000,
59            multiplier: 2,
60            max_attempts: 20,
61            max_elapsed_secs: 3600,
62            jitter: JitterKind::Half,
63        }
64    }
65
66    /// Cold-start retry: 2s base, 2 attempts, no jitter, 30s deadline.
67    pub fn cold_start() -> Self {
68        Self {
69            initial_delay_ms: 2000,
70            max_delay_ms: 4000,
71            multiplier: 2,
72            max_attempts: 2,
73            max_elapsed_secs: 30,
74            jitter: JitterKind::None,
75        }
76    }
77}
78
79/// Computes the delay for a given attempt using the config's jitter strategy.
80///
81/// # Formula
82///
83/// ```text
84/// base = min(initial_delay_ms * multiplier^attempt, max_delay_ms)
85/// delay = apply_jitter(base, jitter_kind)
86/// ```
87pub fn compute_delay(config: &RetryConfig, attempt: u32) -> Duration {
88    let base = config
89        .initial_delay_ms
90        .saturating_mul(config.multiplier.saturating_pow(attempt))
91        .min(config.max_delay_ms);
92
93    let delay_ms = match config.jitter {
94        JitterKind::None => base,
95        JitterKind::Half => {
96            let half = base / 2;
97            if half == 0 {
98                base
99            } else {
100                half + fastrand::u64(0..half)
101            }
102        }
103        JitterKind::Full => {
104            if base == 0 {
105                0
106            } else {
107                fastrand::u64(0..base)
108            }
109        }
110    };
111
112    Duration::from_millis(delay_ms)
113}
114
115/// Returns `true` if the env var `SQLITE_GRAPHRAG_DISABLE_RETRY` is set to `1`.
116///
117/// When active, all retry loops should propagate the error immediately without
118/// sleeping. Use during incidents to prevent retry storms.
119pub fn is_kill_switch_active() -> bool {
120    std::env::var("SQLITE_GRAPHRAG_DISABLE_RETRY").is_ok_and(|v| v == "1")
121}
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126
127    #[test]
128    fn compute_delay_half_jitter_in_bounds() {
129        let cfg = RetryConfig::llm_rate_limit();
130        for attempt in 0..5 {
131            for _ in 0..100 {
132                let d = compute_delay(&cfg, attempt);
133                let base = cfg
134                    .initial_delay_ms
135                    .saturating_mul(cfg.multiplier.saturating_pow(attempt))
136                    .min(cfg.max_delay_ms);
137                let half = base / 2;
138                assert!(d.as_millis() >= half as u128);
139                assert!(d.as_millis() < base as u128);
140            }
141        }
142    }
143
144    #[test]
145    fn compute_delay_no_jitter_is_deterministic() {
146        let cfg = RetryConfig::cold_start();
147        let d1 = compute_delay(&cfg, 0);
148        let d2 = compute_delay(&cfg, 0);
149        assert_eq!(d1, d2);
150        assert_eq!(d1, Duration::from_millis(2000));
151    }
152
153    #[test]
154    fn kill_switch_inactive_by_default() {
155        std::env::remove_var("SQLITE_GRAPHRAG_DISABLE_RETRY");
156        assert!(!is_kill_switch_active());
157    }
158
159    #[test]
160    fn sqlite_busy_config_matches_constants() {
161        let cfg = RetryConfig::sqlite_busy();
162        assert_eq!(cfg.initial_delay_ms, 300);
163        assert_eq!(cfg.max_attempts, 5);
164        assert_eq!(cfg.max_elapsed_secs, 30);
165    }
166
167    #[test]
168    fn llm_rate_limit_has_deadline() {
169        let cfg = RetryConfig::llm_rate_limit();
170        assert_eq!(cfg.max_elapsed_secs, 3600);
171        assert_eq!(cfg.max_delay_ms, 900_000);
172    }
173
174    #[test]
175    fn full_jitter_stays_below_base() {
176        let cfg = RetryConfig {
177            initial_delay_ms: 1000,
178            max_delay_ms: 10_000,
179            multiplier: 2,
180            max_attempts: 5,
181            max_elapsed_secs: 60,
182            jitter: JitterKind::Full,
183        };
184        for attempt in 0..4 {
185            for _ in 0..100 {
186                let d = compute_delay(&cfg, attempt);
187                let base = cfg
188                    .initial_delay_ms
189                    .saturating_mul(cfg.multiplier.saturating_pow(attempt))
190                    .min(cfg.max_delay_ms);
191                assert!(d.as_millis() < base as u128);
192            }
193        }
194    }
195}