inference_remote_core/
retry.rs1use std::time::Duration;
8
9use inference_core::deployment::RetryPolicy;
10use inference_core::error::InferenceError;
11
12use crate::backoff::{compute_backoff, BackoffPolicy};
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub struct Attempt(pub u32);
16
17impl Attempt {
18 pub fn zero() -> Self {
19 Self(0)
20 }
21}
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum RetryDecision {
25 Retry { after: Duration },
26 GiveUp,
27}
28
29pub struct RetryEngine {
30 policy: RetryPolicy,
31 backoff: BackoffPolicy,
32 idempotent: bool,
33}
34
35impl RetryEngine {
36 pub fn new(policy: RetryPolicy, idempotent: bool) -> Self {
37 let backoff = BackoffPolicy::from(&policy);
38 Self {
39 policy,
40 backoff,
41 idempotent,
42 }
43 }
44
45 pub fn decide(&self, attempt: Attempt, err: &InferenceError) -> RetryDecision {
49 if !self.idempotent {
50 return RetryDecision::GiveUp;
51 }
52 if attempt.0 >= self.policy.max_retries {
53 return RetryDecision::GiveUp;
54 }
55 if !err.is_retryable() {
56 return RetryDecision::GiveUp;
57 }
58 if let InferenceError::RateLimited {
60 retry_after: Some(server_ra),
61 ..
62 } = err
63 {
64 if self.policy.respect_retry_after {
65 return RetryDecision::Retry { after: *server_ra };
66 }
67 }
68 RetryDecision::Retry {
69 after: compute_backoff(&self.backoff, attempt.0),
70 }
71 }
72}
73
74#[cfg(test)]
75mod tests {
76 use super::*;
77 use inference_core::runtime::{JitterKind, ProviderKind};
78
79 fn policy() -> RetryPolicy {
80 RetryPolicy {
81 max_retries: 3,
82 initial_backoff: Duration::from_millis(10),
83 max_backoff: Duration::from_millis(100),
84 backoff_multiplier: 2.0,
85 jitter: JitterKind::None,
86 respect_retry_after: true,
87 }
88 }
89
90 #[test]
91 fn retries_on_429_until_max() {
92 let e = RetryEngine::new(policy(), true);
93 let err = InferenceError::RateLimited {
94 provider: ProviderKind::OpenAi,
95 retry_after: None,
96 };
97 assert!(matches!(e.decide(Attempt(0), &err), RetryDecision::Retry { .. }));
98 assert!(matches!(e.decide(Attempt(2), &err), RetryDecision::Retry { .. }));
99 assert!(matches!(e.decide(Attempt(3), &err), RetryDecision::GiveUp));
100 }
101
102 #[test]
103 fn no_retry_on_content_filter() {
104 let e = RetryEngine::new(policy(), true);
105 let err = InferenceError::ContentFiltered {
106 reason: "harmful".into(),
107 };
108 assert!(matches!(e.decide(Attempt(0), &err), RetryDecision::GiveUp));
109 }
110
111 #[test]
112 fn no_retry_when_not_idempotent() {
113 let e = RetryEngine::new(policy(), false);
114 let err = InferenceError::ServerError {
115 status: 503,
116 body: None,
117 };
118 assert!(matches!(e.decide(Attempt(0), &err), RetryDecision::GiveUp));
119 }
120
121 #[test]
122 fn server_retry_after_wins_when_respected() {
123 let e = RetryEngine::new(policy(), true);
124 let err = InferenceError::RateLimited {
125 provider: ProviderKind::OpenAi,
126 retry_after: Some(Duration::from_secs(5)),
127 };
128 match e.decide(Attempt(0), &err) {
129 RetryDecision::Retry { after } => assert_eq!(after, Duration::from_secs(5)),
130 other => panic!("expected retry, got {other:?}"),
131 }
132 }
133}