1use crate::error::{Error, Result};
4use std::time::Duration;
5use tracing::{debug, error, info, warn};
6
7#[derive(Debug, Clone)]
9pub struct BackoffConfig {
10 pub initial_delay_ms: u64,
12 pub max_delay_ms: u64,
14 pub multiplier: f64,
16 pub max_retries: u32,
18}
19
20impl BackoffConfig {
21 pub fn new() -> Self {
23 Self {
24 initial_delay_ms: 100,
25 max_delay_ms: 30000,
26 multiplier: 2.0,
27 max_retries: 5,
28 }
29 }
30
31 pub fn with_initial_delay(mut self, delay_ms: u64) -> Self {
33 self.initial_delay_ms = delay_ms;
34 self
35 }
36
37 pub fn with_max_delay(mut self, delay_ms: u64) -> Self {
39 self.max_delay_ms = delay_ms;
40 self
41 }
42
43 pub fn with_multiplier(mut self, multiplier: f64) -> Self {
45 self.multiplier = multiplier;
46 self
47 }
48
49 pub fn with_max_retries(mut self, retries: u32) -> Self {
51 self.max_retries = retries;
52 self
53 }
54
55 pub fn calculate_delay(&self, attempt: u32) -> Duration {
57 let delay = (self.initial_delay_ms as f64 * self.multiplier.powi(attempt as i32)) as u64;
58 let delay = delay.min(self.max_delay_ms);
59 Duration::from_millis(delay)
60 }
61}
62
63impl Default for BackoffConfig {
64 fn default() -> Self {
65 Self::new()
66 }
67}
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum RecoveryStrategy {
72 Retry,
74 Fail,
76 Fallback,
78 GracefulDegradation,
80}
81
82pub fn determine_recovery_strategy(error: &Error) -> RecoveryStrategy {
84 match error {
85 Error::TimeoutError(_) => RecoveryStrategy::Retry,
87 Error::ConnectionError(_) => RecoveryStrategy::Retry,
88 Error::ServerDisconnected(_) => RecoveryStrategy::Retry,
89 Error::ExecutionInterrupted => RecoveryStrategy::Retry,
90
91 Error::ServerError(_) => RecoveryStrategy::Retry,
93
94 Error::ToolNotFound(_) => RecoveryStrategy::Fail,
96 Error::PermissionDenied(_) => RecoveryStrategy::Fail,
97 Error::NamingConflict(_) => RecoveryStrategy::Fail,
98 Error::MultipleNamingConflicts(_) => RecoveryStrategy::Fail,
99
100 Error::ValidationError(_) => RecoveryStrategy::Fail,
102 Error::ParameterValidationError(_) => RecoveryStrategy::Fail,
103 Error::OutputValidationError(_) => RecoveryStrategy::Fail,
104 Error::InvalidToolParameters(_) => RecoveryStrategy::Fail,
105 Error::InvalidToolOutput(_) => RecoveryStrategy::Fail,
106
107 Error::ConfigError(_) => RecoveryStrategy::Fallback,
109 Error::ConfigValidationError(_) => RecoveryStrategy::Fallback,
110 Error::StorageError(_) => RecoveryStrategy::Fallback,
111
112 _ => RecoveryStrategy::GracefulDegradation,
114 }
115}
116
117pub struct RetryHandler {
119 config: BackoffConfig,
120}
121
122impl RetryHandler {
123 pub fn new() -> Self {
125 Self {
126 config: BackoffConfig::new(),
127 }
128 }
129
130 pub fn with_config(config: BackoffConfig) -> Self {
132 Self { config }
133 }
134
135 pub async fn execute_with_retry<F, T>(&self, operation_name: &str, mut operation: F) -> Result<T>
144 where
145 F: FnMut() -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<T>>>>,
146 {
147 let mut attempt = 0;
148
149 loop {
150 debug!(
151 "Executing operation '{}' (attempt {}/{})",
152 operation_name,
153 attempt + 1,
154 self.config.max_retries + 1
155 );
156
157 match operation().await {
158 Ok(result) => {
159 if attempt > 0 {
160 info!(
161 "Operation '{}' succeeded after {} retries",
162 operation_name, attempt
163 );
164 }
165 return Ok(result);
166 }
167 Err(err) => {
168 if !err.is_recoverable() {
169 debug!(
170 "Operation '{}' failed with non-recoverable error: {}",
171 operation_name, err
172 );
173 return Err(err);
174 }
175
176 if attempt >= self.config.max_retries {
177 error!(
178 "Operation '{}' failed after {} retries: {}",
179 operation_name, attempt, err
180 );
181 return Err(Error::MaxRetriesExceeded(format!(
182 "Operation '{}' failed after {} retries: {}",
183 operation_name, attempt, err
184 )));
185 }
186
187 let delay = self.config.calculate_delay(attempt);
188 warn!(
189 "Operation '{}' failed (attempt {}): {}. Retrying in {:?}",
190 operation_name, attempt + 1, err, delay
191 );
192
193 tokio::time::sleep(delay).await;
194 attempt += 1;
195 }
196 }
197 }
198 }
199
200 pub async fn execute_with_retry_and_timeout<F, T>(
202 &self,
203 operation_name: &str,
204 timeout_ms: u64,
205 mut operation: F,
206 ) -> Result<T>
207 where
208 F: FnMut() -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<T>>>>,
209 {
210 let timeout = Duration::from_millis(timeout_ms);
211 let mut attempt = 0;
212
213 loop {
214 debug!(
215 "Executing operation '{}' with timeout {:?} (attempt {}/{})",
216 operation_name,
217 timeout,
218 attempt + 1,
219 self.config.max_retries + 1
220 );
221
222 match tokio::time::timeout(timeout, operation()).await {
223 Ok(Ok(result)) => {
224 if attempt > 0 {
225 info!(
226 "Operation '{}' succeeded after {} retries",
227 operation_name, attempt
228 );
229 }
230 return Ok(result);
231 }
232 Ok(Err(err)) => {
233 if !err.is_recoverable() {
234 debug!(
235 "Operation '{}' failed with non-recoverable error: {}",
236 operation_name, err
237 );
238 return Err(err);
239 }
240
241 if attempt >= self.config.max_retries {
242 error!(
243 "Operation '{}' failed after {} retries: {}",
244 operation_name, attempt, err
245 );
246 return Err(Error::MaxRetriesExceeded(format!(
247 "Operation '{}' failed after {} retries: {}",
248 operation_name, attempt, err
249 )));
250 }
251
252 let delay = self.config.calculate_delay(attempt);
253 warn!(
254 "Operation '{}' failed (attempt {}): {}. Retrying in {:?}",
255 operation_name, attempt + 1, err, delay
256 );
257
258 tokio::time::sleep(delay).await;
259 attempt += 1;
260 }
261 Err(_) => {
262 if attempt >= self.config.max_retries {
263 error!(
264 "Operation '{}' timed out after {} retries",
265 operation_name, attempt
266 );
267 return Err(Error::TimeoutError(timeout_ms));
268 }
269
270 let delay = self.config.calculate_delay(attempt);
271 warn!(
272 "Operation '{}' timed out (attempt {}). Retrying in {:?}",
273 operation_name,
274 attempt + 1,
275 delay
276 );
277
278 tokio::time::sleep(delay).await;
279 attempt += 1;
280 }
281 }
282 }
283 }
284}
285
286impl Default for RetryHandler {
287 fn default() -> Self {
288 Self::new()
289 }
290}
291
292#[derive(Debug, Clone)]
294pub struct GracefulDegradationHandler {
295 available_resources: Vec<String>,
297 unavailable_resources: Vec<String>,
299}
300
301impl GracefulDegradationHandler {
302 pub fn new() -> Self {
304 Self {
305 available_resources: Vec::new(),
306 unavailable_resources: Vec::new(),
307 }
308 }
309
310 pub fn mark_available(&mut self, resource_id: String) {
312 self.available_resources.push(resource_id.clone());
313 self.unavailable_resources.retain(|r| r != &resource_id);
314 info!("Resource marked as available: {}", resource_id);
315 }
316
317 pub fn mark_unavailable(&mut self, resource_id: String) {
319 self.unavailable_resources.push(resource_id.clone());
320 self.available_resources.retain(|r| r != &resource_id);
321 warn!("Resource marked as unavailable: {}", resource_id);
322 }
323
324 pub fn get_available_resources(&self) -> Vec<String> {
326 self.available_resources.clone()
327 }
328
329 pub fn get_unavailable_resources(&self) -> Vec<String> {
331 self.unavailable_resources.clone()
332 }
333
334 pub fn has_available_resources(&self) -> bool {
336 !self.available_resources.is_empty()
337 }
338
339 pub fn availability_percentage(&self) -> f64 {
341 let total = self.available_resources.len() + self.unavailable_resources.len();
342 if total == 0 {
343 0.0
344 } else {
345 (self.available_resources.len() as f64 / total as f64) * 100.0
346 }
347 }
348}
349
350impl Default for GracefulDegradationHandler {
351 fn default() -> Self {
352 Self::new()
353 }
354}
355
356#[cfg(test)]
357mod tests {
358 use super::*;
359 use std::sync::Arc;
360
361 #[test]
362 fn test_backoff_config_default() {
363 let config = BackoffConfig::new();
364 assert_eq!(config.initial_delay_ms, 100);
365 assert_eq!(config.max_delay_ms, 30000);
366 assert_eq!(config.multiplier, 2.0);
367 assert_eq!(config.max_retries, 5);
368 }
369
370 #[test]
371 fn test_backoff_config_custom() {
372 let config = BackoffConfig::new()
373 .with_initial_delay(50)
374 .with_max_delay(10000)
375 .with_multiplier(1.5)
376 .with_max_retries(3);
377
378 assert_eq!(config.initial_delay_ms, 50);
379 assert_eq!(config.max_delay_ms, 10000);
380 assert_eq!(config.multiplier, 1.5);
381 assert_eq!(config.max_retries, 3);
382 }
383
384 #[test]
385 fn test_calculate_delay() {
386 let config = BackoffConfig::new()
387 .with_initial_delay(100)
388 .with_max_delay(10000)
389 .with_multiplier(2.0);
390
391 assert_eq!(config.calculate_delay(0), Duration::from_millis(100));
392 assert_eq!(config.calculate_delay(1), Duration::from_millis(200));
393 assert_eq!(config.calculate_delay(2), Duration::from_millis(400));
394 assert_eq!(config.calculate_delay(3), Duration::from_millis(800));
395 assert_eq!(config.calculate_delay(4), Duration::from_millis(1600));
396 assert_eq!(config.calculate_delay(5), Duration::from_millis(3200));
397 assert_eq!(config.calculate_delay(6), Duration::from_millis(6400));
398 assert_eq!(config.calculate_delay(7), Duration::from_millis(10000)); }
400
401 #[test]
402 fn test_determine_recovery_strategy_retry() {
403 assert_eq!(
404 determine_recovery_strategy(&Error::TimeoutError(5000)),
405 RecoveryStrategy::Retry
406 );
407 assert_eq!(
408 determine_recovery_strategy(&Error::ConnectionError("test".to_string())),
409 RecoveryStrategy::Retry
410 );
411 }
412
413 #[test]
414 fn test_determine_recovery_strategy_fail() {
415 assert_eq!(
416 determine_recovery_strategy(&Error::ToolNotFound("test".to_string())),
417 RecoveryStrategy::Fail
418 );
419 assert_eq!(
420 determine_recovery_strategy(&Error::PermissionDenied("test".to_string())),
421 RecoveryStrategy::Fail
422 );
423 }
424
425 #[test]
426 fn test_determine_recovery_strategy_fallback() {
427 assert_eq!(
428 determine_recovery_strategy(&Error::ConfigError("test".to_string())),
429 RecoveryStrategy::Fallback
430 );
431 }
432
433 #[test]
434 fn test_retry_handler_default() {
435 let handler = RetryHandler::new();
436 assert_eq!(handler.config.max_retries, 5);
437 }
438
439 #[test]
440 fn test_retry_handler_custom_config() {
441 let config = BackoffConfig::new().with_max_retries(3);
442 let handler = RetryHandler::with_config(config);
443 assert_eq!(handler.config.max_retries, 3);
444 }
445
446 #[tokio::test]
447 async fn test_execute_with_retry_success() {
448 let handler = RetryHandler::new();
449 let call_count = Arc::new(tokio::sync::Mutex::new(0));
450 let call_count_clone = call_count.clone();
451
452 let result = handler
453 .execute_with_retry("test_op", || {
454 let count = call_count_clone.clone();
455 Box::pin(async move {
456 let mut c = count.lock().await;
457 *c += 1;
458 Ok::<i32, Error>(42)
459 })
460 })
461 .await;
462
463 assert!(result.is_ok());
464 assert_eq!(result.unwrap(), 42);
465 assert_eq!(*call_count.lock().await, 1);
466 }
467
468 #[tokio::test]
469 async fn test_execute_with_retry_non_recoverable_error() {
470 let handler = RetryHandler::new();
471 let call_count = Arc::new(tokio::sync::Mutex::new(0));
472 let call_count_clone = call_count.clone();
473
474 let result = handler
475 .execute_with_retry("test_op", || {
476 let count = call_count_clone.clone();
477 Box::pin(async move {
478 let mut c = count.lock().await;
479 *c += 1;
480 Err::<i32, Error>(Error::ToolNotFound("test".to_string()))
481 })
482 })
483 .await;
484
485 assert!(result.is_err());
486 assert_eq!(*call_count.lock().await, 1); }
488
489 #[test]
490 fn test_graceful_degradation_handler() {
491 let mut handler = GracefulDegradationHandler::new();
492
493 handler.mark_available("server1".to_string());
494 handler.mark_available("server2".to_string());
495 handler.mark_unavailable("server3".to_string());
496
497 assert_eq!(handler.get_available_resources().len(), 2);
498 assert_eq!(handler.get_unavailable_resources().len(), 1);
499 assert!(handler.has_available_resources());
500 assert_eq!(handler.availability_percentage(), 66.66666666666666);
501 }
502
503 #[test]
504 fn test_graceful_degradation_handler_all_unavailable() {
505 let mut handler = GracefulDegradationHandler::new();
506
507 handler.mark_unavailable("server1".to_string());
508 handler.mark_unavailable("server2".to_string());
509
510 assert!(!handler.has_available_resources());
511 assert_eq!(handler.availability_percentage(), 0.0);
512 }
513
514 #[test]
515 fn test_graceful_degradation_handler_recovery() {
516 let mut handler = GracefulDegradationHandler::new();
517
518 handler.mark_unavailable("server1".to_string());
519 assert!(!handler.has_available_resources());
520
521 handler.mark_available("server1".to_string());
522 assert!(handler.has_available_resources());
523 assert_eq!(handler.get_unavailable_resources().len(), 0);
524 }
525}