Skip to main content

inference_remote_core/
retry.rs

1//! Per-request retry decision logic. Doc §3.5 (Backoff on 429), §12.3.
2//!
3//! `RetryEngine` is intentionally a value, not an actor — the retry
4//! loop runs inside one `RemoteWorkerActor::execute` call and a
5//! mailbox hop per attempt is gratuitous overhead.
6
7use std::time::Duration;
8
9use inference_core::deployment::RetryPolicy;
10use inference_core::error::InferenceError;
11
12use crate::backoff::{compute_backoff, BackoffPolicy};
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub struct Attempt(pub u32);
16
17impl Attempt {
18    pub fn zero() -> Self {
19        Self(0)
20    }
21}
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum RetryDecision {
25    Retry { after: Duration },
26    GiveUp,
27}
28
29pub struct RetryEngine {
30    policy: RetryPolicy,
31    backoff: BackoffPolicy,
32    idempotent: bool,
33}
34
35impl RetryEngine {
36    pub fn new(policy: RetryPolicy, idempotent: bool) -> Self {
37        let backoff = BackoffPolicy::from(&policy);
38        Self {
39            policy,
40            backoff,
41            idempotent,
42        }
43    }
44
45    /// Decide whether to retry after a failed attempt. `attempt` is the
46    /// 0-indexed attempt that just failed (so `0` means we've made one
47    /// attempt and are deciding whether to make a second).
48    pub fn decide(&self, attempt: Attempt, err: &InferenceError) -> RetryDecision {
49        if !self.idempotent {
50            return RetryDecision::GiveUp;
51        }
52        if attempt.0 >= self.policy.max_retries {
53            return RetryDecision::GiveUp;
54        }
55        if !err.is_retryable() {
56            return RetryDecision::GiveUp;
57        }
58        // 429 with server-provided `Retry-After` overrides the policy.
59        if let InferenceError::RateLimited {
60            retry_after: Some(server_ra),
61            ..
62        } = err
63        {
64            if self.policy.respect_retry_after {
65                return RetryDecision::Retry { after: *server_ra };
66            }
67        }
68        RetryDecision::Retry {
69            after: compute_backoff(&self.backoff, attempt.0),
70        }
71    }
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77    use inference_core::runtime::{JitterKind, ProviderKind};
78
79    fn policy() -> RetryPolicy {
80        RetryPolicy {
81            max_retries: 3,
82            initial_backoff: Duration::from_millis(10),
83            max_backoff: Duration::from_millis(100),
84            backoff_multiplier: 2.0,
85            jitter: JitterKind::None,
86            respect_retry_after: true,
87        }
88    }
89
90    #[test]
91    fn retries_on_429_until_max() {
92        let e = RetryEngine::new(policy(), true);
93        let err = InferenceError::RateLimited {
94            provider: ProviderKind::OpenAi,
95            retry_after: None,
96        };
97        assert!(matches!(e.decide(Attempt(0), &err), RetryDecision::Retry { .. }));
98        assert!(matches!(e.decide(Attempt(2), &err), RetryDecision::Retry { .. }));
99        assert!(matches!(e.decide(Attempt(3), &err), RetryDecision::GiveUp));
100    }
101
102    #[test]
103    fn no_retry_on_content_filter() {
104        let e = RetryEngine::new(policy(), true);
105        let err = InferenceError::ContentFiltered {
106            reason: "harmful".into(),
107        };
108        assert!(matches!(e.decide(Attempt(0), &err), RetryDecision::GiveUp));
109    }
110
111    #[test]
112    fn no_retry_when_not_idempotent() {
113        let e = RetryEngine::new(policy(), false);
114        let err = InferenceError::ServerError {
115            status: 503,
116            body: None,
117        };
118        assert!(matches!(e.decide(Attempt(0), &err), RetryDecision::GiveUp));
119    }
120
121    #[test]
122    fn server_retry_after_wins_when_respected() {
123        let e = RetryEngine::new(policy(), true);
124        let err = InferenceError::RateLimited {
125            provider: ProviderKind::OpenAi,
126            retry_after: Some(Duration::from_secs(5)),
127        };
128        match e.decide(Attempt(0), &err) {
129            RetryDecision::Retry { after } => assert_eq!(after, Duration::from_secs(5)),
130            other => panic!("expected retry, got {other:?}"),
131        }
132    }
133}