aperture_cli/resilience/
mod.rs

1use crate::error::Error;
2use std::time::{Duration, Instant};
3use tokio::time::sleep;
4
5/// Configuration for retry behavior
6#[derive(Debug, Clone)]
7pub struct RetryConfig {
8    pub max_attempts: usize,
9    pub initial_delay_ms: u64,
10    pub max_delay_ms: u64,
11    pub backoff_multiplier: f64,
12    pub jitter: bool,
13}
14
15impl Default for RetryConfig {
16    fn default() -> Self {
17        Self {
18            max_attempts: 3,
19            initial_delay_ms: 100,
20            max_delay_ms: 5000,
21            backoff_multiplier: 2.0,
22            jitter: true,
23        }
24    }
25}
26
27/// Configuration for timeout behavior
28#[derive(Debug, Clone)]
29pub struct TimeoutConfig {
30    pub connect_timeout_ms: u64,
31    pub request_timeout_ms: u64,
32}
33
34impl Default for TimeoutConfig {
35    fn default() -> Self {
36        Self {
37            connect_timeout_ms: 10_000, // 10 seconds
38            request_timeout_ms: 30_000, // 30 seconds
39        }
40    }
41}
42
43/// Determines if an error is retryable based on its characteristics
44#[must_use]
45pub fn is_retryable_error(error: &reqwest::Error) -> bool {
46    // Connection errors are usually retryable
47    if error.is_connect() {
48        return true;
49    }
50
51    // Timeout errors are retryable
52    if error.is_timeout() {
53        return true;
54    }
55
56    // Check HTTP status codes
57    error.status().is_none_or(|status| match status.as_u16() {
58        // Client errors (4xx) are generally not retryable except for specific cases
59        408 | 429 => true, // Request Timeout, Too Many Requests
60
61        // Server errors (5xx) are generally retryable except for specific cases
62        500..=599 => !matches!(status.as_u16(), 501 | 505), // Exclude Not Implemented, HTTP Version not supported
63
64        _ => false, // All other codes (1xx, 2xx, 3xx, 4xx except 408/429) are not retryable
65    })
66}
67
68/// Calculates the delay for a given retry attempt with exponential backoff
69#[must_use]
70#[allow(
71    clippy::cast_precision_loss,
72    clippy::cast_possible_truncation,
73    clippy::cast_sign_loss,
74    clippy::cast_possible_wrap
75)]
76pub fn calculate_retry_delay(config: &RetryConfig, attempt: usize) -> Duration {
77    let base_delay = config.initial_delay_ms as f64;
78    let attempt_i32 = attempt.min(30) as i32; // Cap attempt to prevent overflow
79    let delay_ms =
80        (base_delay * config.backoff_multiplier.powi(attempt_i32)).min(config.max_delay_ms as f64);
81
82    let final_delay_ms = if config.jitter {
83        // Add up to 25% jitter to prevent thundering herd
84        let jitter_factor = fastrand::f64().mul_add(0.25, 1.0);
85        delay_ms * jitter_factor
86    } else {
87        delay_ms
88    } as u64;
89
90    Duration::from_millis(final_delay_ms)
91}
92
93/// Executes a future with retry logic based on the configuration
94///
95/// # Errors
96/// Returns an error if all retry attempts fail or if a non-retryable error occurs
97pub async fn execute_with_retry<F, Fut, T>(
98    config: &RetryConfig,
99    _operation_name: &str,
100    mut operation: F,
101) -> Result<T, Error>
102where
103    F: FnMut() -> Fut,
104    Fut: std::future::Future<Output = Result<T, reqwest::Error>>,
105{
106    let _start_time = Instant::now();
107    let mut last_error = None;
108
109    for attempt in 0..config.max_attempts {
110        match operation().await {
111            Ok(result) => {
112                // Successfully completed operation
113                return Ok(result);
114            }
115            Err(error) => {
116                let is_last_attempt = attempt + 1 >= config.max_attempts;
117                let is_retryable = is_retryable_error(&error);
118
119                if is_last_attempt || !is_retryable {
120                    let error_message = error.to_string();
121                    last_error = Some(error_message.clone());
122
123                    if !is_retryable {
124                        return Err(Error::transient_network_error(error_message, false));
125                    }
126                    break;
127                }
128
129                // Calculate delay and sleep before retry
130                let delay = calculate_retry_delay(config, attempt);
131
132                sleep(delay).await;
133                last_error = Some(error.to_string());
134            }
135        }
136    }
137
138    Err(Error::retry_limit_exceeded(
139        config.max_attempts.try_into().unwrap_or(u32::MAX),
140        last_error.unwrap_or_else(|| "Unknown error".to_string()),
141    ))
142}
143
144/// Creates a resilient HTTP client with timeout configuration
145///
146/// # Errors
147/// Returns an error if the HTTP client cannot be created with the specified configuration
148pub fn create_resilient_client(timeout_config: &TimeoutConfig) -> Result<reqwest::Client, Error> {
149    reqwest::Client::builder()
150        .connect_timeout(Duration::from_millis(timeout_config.connect_timeout_ms))
151        .timeout(Duration::from_millis(timeout_config.request_timeout_ms))
152        .build()
153        .map_err(|e| {
154            Error::network_request_failed(format!("Failed to create resilient HTTP client: {e}"))
155        })
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161
162    #[test]
163    fn test_calculate_retry_delay() {
164        let config = RetryConfig {
165            max_attempts: 5,
166            initial_delay_ms: 100,
167            max_delay_ms: 1000,
168            backoff_multiplier: 2.0,
169            jitter: false,
170        };
171
172        let delay1 = calculate_retry_delay(&config, 0);
173        let delay2 = calculate_retry_delay(&config, 1);
174        let delay3 = calculate_retry_delay(&config, 2);
175
176        assert_eq!(delay1.as_millis(), 100);
177        assert_eq!(delay2.as_millis(), 200);
178        assert_eq!(delay3.as_millis(), 400);
179
180        // Test max delay cap
181        let delay_max = calculate_retry_delay(&config, 10);
182        assert_eq!(delay_max.as_millis(), 1000);
183    }
184
185    #[test]
186    fn test_calculate_retry_delay_with_jitter() {
187        let config = RetryConfig {
188            max_attempts: 3,
189            initial_delay_ms: 100,
190            max_delay_ms: 1000,
191            backoff_multiplier: 2.0,
192            jitter: true,
193        };
194
195        let delay1 = calculate_retry_delay(&config, 0);
196        let delay2 = calculate_retry_delay(&config, 0);
197
198        // With jitter, delays should be different most of the time
199        // We test that both delays are within expected range
200        assert!(delay1.as_millis() >= 100 && delay1.as_millis() <= 125);
201        assert!(delay2.as_millis() >= 100 && delay2.as_millis() <= 125);
202    }
203
204    #[test]
205    fn test_default_configs() {
206        let retry_config = RetryConfig::default();
207        assert_eq!(retry_config.max_attempts, 3);
208        assert_eq!(retry_config.initial_delay_ms, 100);
209
210        let timeout_config = TimeoutConfig::default();
211        assert_eq!(timeout_config.connect_timeout_ms, 10_000);
212        assert_eq!(timeout_config.request_timeout_ms, 30_000);
213    }
214}