claude_agent/client/resilience/
mod.rs1mod backoff;
6mod circuit;
7
8pub use backoff::ExponentialBackoff;
9pub use circuit::{CircuitBreaker, CircuitConfig, CircuitState};
10
11use std::sync::Arc;
12use std::time::Duration;
13
14#[derive(Clone)]
15pub struct ResilienceConfig {
16 pub retry: RetryConfig,
17 pub circuit: Option<CircuitConfig>,
18 pub timeout: Duration,
19}
20
21#[derive(Clone)]
22pub struct RetryConfig {
23 pub max_retries: u32,
24 pub backoff: ExponentialBackoff,
25 pub retry_on_rate_limit: bool,
26 pub retry_on_server_error: bool,
27 pub retry_on_network_error: bool,
28}
29
30impl Default for ResilienceConfig {
31 fn default() -> Self {
32 Self {
33 retry: RetryConfig::default(),
34 circuit: Some(CircuitConfig::default()),
35 timeout: Duration::from_secs(120),
36 }
37 }
38}
39
40impl Default for RetryConfig {
41 fn default() -> Self {
42 Self {
43 max_retries: 3,
44 backoff: ExponentialBackoff::default(),
45 retry_on_rate_limit: true,
46 retry_on_server_error: true,
47 retry_on_network_error: true,
48 }
49 }
50}
51
52impl ResilienceConfig {
53 pub fn no_retry() -> Self {
54 Self {
55 retry: RetryConfig {
56 max_retries: 0,
57 ..Default::default()
58 },
59 circuit: None,
60 timeout: Duration::from_secs(120),
61 }
62 }
63
64 pub fn aggressive() -> Self {
65 Self {
66 retry: RetryConfig {
67 max_retries: 5,
68 backoff: ExponentialBackoff::new(
69 Duration::from_millis(50),
70 Duration::from_secs(10),
71 2.0,
72 ),
73 ..Default::default()
74 },
75 circuit: Some(CircuitConfig {
76 failure_threshold: 10,
77 recovery_timeout: Duration::from_secs(60),
78 success_threshold: 5,
79 }),
80 timeout: Duration::from_secs(300),
81 }
82 }
83
84 pub fn conservative() -> Self {
85 Self {
86 retry: RetryConfig {
87 max_retries: 2,
88 backoff: ExponentialBackoff::new(
89 Duration::from_millis(500),
90 Duration::from_secs(30),
91 2.0,
92 ),
93 ..Default::default()
94 },
95 circuit: Some(CircuitConfig::default()),
96 timeout: Duration::from_secs(60),
97 }
98 }
99}
100
101pub struct Resilience {
102 config: ResilienceConfig,
103 circuit: Option<Arc<CircuitBreaker>>,
104}
105
106impl Resilience {
107 pub fn new(config: ResilienceConfig) -> Self {
108 let circuit = config
109 .circuit
110 .as_ref()
111 .map(|c| Arc::new(CircuitBreaker::new(c.clone())));
112 Self { config, circuit }
113 }
114
115 pub fn config(&self) -> &ResilienceConfig {
116 &self.config
117 }
118
119 pub fn circuit(&self) -> Option<&Arc<CircuitBreaker>> {
120 self.circuit.as_ref()
121 }
122
123 pub async fn execute<F, T, E>(&self, mut operation: F) -> Result<T, E>
124 where
125 F: FnMut() -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<T, E>> + Send>>,
126 E: Into<crate::Error> + From<crate::Error> + Clone,
127 {
128 if let Some(ref cb) = self.circuit
129 && !cb.allow_request()
130 {
131 let err = crate::Error::Api {
132 message: "Circuit breaker is open".to_string(),
133 status: Some(503),
134 error_type: Some("circuit_open".to_string()),
135 };
136 return Err(E::from(err));
137 }
138
139 let mut attempts = 0;
140 loop {
141 let result = tokio::time::timeout(self.config.timeout, operation()).await;
142
143 match result {
144 Ok(Ok(value)) => {
145 if let Some(ref cb) = self.circuit {
146 cb.record_success();
147 }
148 return Ok(value);
149 }
150 Ok(Err(e)) => {
151 let error: crate::Error = e.clone().into();
152
153 if let Some(ref cb) = self.circuit {
154 cb.record_failure();
155 }
156
157 attempts += 1;
158 if attempts > self.config.retry.max_retries {
159 return Err(e);
160 }
161
162 if !self.should_retry(&error) {
163 return Err(e);
164 }
165
166 let delay = self.config.retry.backoff.delay_for(attempts);
167
168 if let Some(retry_after) = error.retry_after() {
169 tokio::time::sleep(retry_after.max(delay)).await;
170 } else {
171 tokio::time::sleep(delay).await;
172 }
173 }
174 Err(_timeout) => {
175 if let Some(ref cb) = self.circuit {
176 cb.record_failure();
177 }
178
179 attempts += 1;
180 if attempts > self.config.retry.max_retries {
181 return Err(E::from(crate::Error::Timeout(self.config.timeout)));
182 }
183
184 let delay = self.config.retry.backoff.delay_for(attempts);
185 tokio::time::sleep(delay).await;
186 }
187 }
188 }
189 }
190
191 fn should_retry(&self, error: &crate::Error) -> bool {
192 match error {
193 crate::Error::RateLimit { .. } => self.config.retry.retry_on_rate_limit,
194 crate::Error::Network(_) => self.config.retry.retry_on_network_error,
195 crate::Error::Api {
196 status: Some(529), ..
197 } => self.config.retry.retry_on_server_error,
198 crate::Error::Api {
199 status: Some(500..=599),
200 ..
201 } => self.config.retry.retry_on_server_error,
202 _ => false,
203 }
204 }
205}
206
207impl Default for Resilience {
208 fn default() -> Self {
209 Self::new(ResilienceConfig::default())
210 }
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 #[test]
218 fn test_default_config() {
219 let config = ResilienceConfig::default();
220 assert_eq!(config.retry.max_retries, 3);
221 assert!(config.circuit.is_some());
222 }
223
224 #[test]
225 fn test_no_retry_config() {
226 let config = ResilienceConfig::no_retry();
227 assert_eq!(config.retry.max_retries, 0);
228 assert!(config.circuit.is_none());
229 }
230
231 #[test]
232 fn test_aggressive_config() {
233 let config = ResilienceConfig::aggressive();
234 assert_eq!(config.retry.max_retries, 5);
235 assert!(config.circuit.is_some());
236 }
237}