aperture_cli/resilience/
mod.rs

1use crate::error::Error;
2use reqwest::header::HeaderMap;
3use std::time::{Duration, Instant, SystemTime};
4use tokio::time::sleep;
5
6/// Configuration for retry behavior
7#[derive(Debug, Clone)]
8pub struct RetryConfig {
9    pub max_attempts: usize,
10    pub initial_delay_ms: u64,
11    pub max_delay_ms: u64,
12    pub backoff_multiplier: f64,
13    pub jitter: bool,
14}
15
16impl Default for RetryConfig {
17    fn default() -> Self {
18        Self {
19            max_attempts: 3,
20            initial_delay_ms: 100,
21            max_delay_ms: 5000,
22            backoff_multiplier: 2.0,
23            jitter: true,
24        }
25    }
26}
27
28/// Configuration for timeout behavior
29#[derive(Debug, Clone)]
30pub struct TimeoutConfig {
31    pub connect_timeout_ms: u64,
32    pub request_timeout_ms: u64,
33}
34
35impl Default for TimeoutConfig {
36    fn default() -> Self {
37        Self {
38            connect_timeout_ms: 10_000, // 10 seconds
39            request_timeout_ms: 30_000, // 30 seconds
40        }
41    }
42}
43
44/// Information about a single retry attempt for logging and error reporting.
45#[derive(Debug, Clone)]
46pub struct RetryInfo {
47    /// The retry attempt number (1-indexed)
48    pub attempt: u32,
49    /// The HTTP status code that triggered the retry, if available
50    pub status_code: Option<u16>,
51    /// The delay in milliseconds before this retry
52    pub delay_ms: u64,
53    /// Human-readable reason for the retry
54    pub reason: String,
55}
56
57impl RetryInfo {
58    /// Creates a new `RetryInfo` instance.
59    #[must_use]
60    pub fn new(
61        attempt: u32,
62        status_code: Option<u16>,
63        delay_ms: u64,
64        reason: impl Into<String>,
65    ) -> Self {
66        Self {
67            attempt,
68            status_code,
69            delay_ms,
70            reason: reason.into(),
71        }
72    }
73}
74
75/// Result of a retry operation, including retry history for diagnostics.
76#[derive(Debug)]
77pub struct RetryResult<T> {
78    /// The successful result, if any
79    pub result: Result<T, Error>,
80    /// History of retry attempts (empty if succeeded on first try)
81    pub retry_history: Vec<RetryInfo>,
82    /// Total number of attempts made (including the final one)
83    pub total_attempts: u32,
84}
85
86/// Parses the `Retry-After` HTTP header and returns the delay duration.
87///
88/// The `Retry-After` header can be specified in two formats:
89/// - Delay in seconds: `Retry-After: 120`
90/// - HTTP-date: `Retry-After: Wed, 21 Oct 2015 07:28:00 GMT`
91///
92/// Returns `None` if the header is absent, malformed, or represents a time in the past.
93#[must_use]
94pub fn parse_retry_after_header(headers: &HeaderMap) -> Option<Duration> {
95    let retry_after = headers.get("retry-after")?;
96    let value = retry_after.to_str().ok()?;
97    parse_retry_after_value(value)
98}
99
100/// Parses a `Retry-After` header value string and returns the delay duration.
101///
102/// This is the core parsing logic that can be used with any string source.
103/// Supports two formats:
104/// - Delay in seconds: `"120"`
105/// - HTTP-date: `"Wed, 21 Oct 2015 07:28:00 GMT"`
106///
107/// Returns `None` if the value is malformed or represents a time in the past.
108#[must_use]
109pub fn parse_retry_after_value(value: &str) -> Option<Duration> {
110    // Try parsing as seconds first (most common)
111    if let Ok(seconds) = value.parse::<u64>() {
112        return Some(Duration::from_secs(seconds));
113    }
114
115    // Try parsing as HTTP-date (RFC 7231 format)
116    // Format: "Wed, 21 Oct 2015 07:28:00 GMT"
117    // Returns None if parsing fails or date is in the past
118    httpdate::parse_http_date(value)
119        .ok()
120        .and_then(|date| date.duration_since(SystemTime::now()).ok())
121}
122
123/// Calculates the retry delay, respecting an optional `Retry-After` header value.
124///
125/// If `retry_after` is provided and greater than the calculated exponential backoff delay,
126/// the `retry_after` value is used instead (still capped at `max_delay_ms`).
127#[must_use]
128#[allow(
129    clippy::cast_precision_loss,
130    clippy::cast_possible_truncation,
131    clippy::cast_sign_loss,
132    clippy::cast_possible_wrap
133)]
134pub fn calculate_retry_delay_with_header(
135    config: &RetryConfig,
136    attempt: usize,
137    retry_after: Option<Duration>,
138) -> Duration {
139    let calculated_delay = calculate_retry_delay(config, attempt);
140
141    retry_after.map_or(calculated_delay, |server_delay| {
142        // Use server-specified delay if it's longer than our calculated delay
143        let delay = calculated_delay.max(server_delay);
144        // But cap it at max_delay_ms
145        let max_delay = Duration::from_millis(config.max_delay_ms);
146        delay.min(max_delay)
147    })
148}
149
150/// Determines if an error is retryable based on its characteristics
151#[must_use]
152pub fn is_retryable_error(error: &reqwest::Error) -> bool {
153    // Connection errors are usually retryable
154    if error.is_connect() {
155        return true;
156    }
157
158    // Timeout errors are retryable
159    if error.is_timeout() {
160        return true;
161    }
162
163    // Check HTTP status codes
164    error
165        .status()
166        .is_none_or(|status| is_retryable_status(status.as_u16()))
167}
168
169/// Determines if an HTTP status code is retryable.
170///
171/// Retryable status codes:
172/// - 408 Request Timeout
173/// - 429 Too Many Requests
174/// - 500-599 Server errors (except 501 Not Implemented, 505 HTTP Version Not Supported)
175#[must_use]
176pub const fn is_retryable_status(status: u16) -> bool {
177    match status {
178        // Client errors (4xx) are generally not retryable except for specific cases
179        408 | 429 => true, // Request Timeout, Too Many Requests
180
181        // Server errors (5xx) are generally retryable except for specific cases
182        500..=599 => !matches!(status, 501 | 505), // Exclude Not Implemented, HTTP Version not supported
183
184        _ => false, // All other codes (1xx, 2xx, 3xx, 4xx except 408/429) are not retryable
185    }
186}
187
188/// Calculates the delay for a given retry attempt with exponential backoff
189#[must_use]
190#[allow(
191    clippy::cast_precision_loss,
192    clippy::cast_possible_truncation,
193    clippy::cast_sign_loss,
194    clippy::cast_possible_wrap
195)]
196pub fn calculate_retry_delay(config: &RetryConfig, attempt: usize) -> Duration {
197    let base_delay = config.initial_delay_ms as f64;
198    let attempt_i32 = attempt.min(30) as i32; // Cap attempt to prevent overflow
199    let delay_ms =
200        (base_delay * config.backoff_multiplier.powi(attempt_i32)).min(config.max_delay_ms as f64);
201
202    let final_delay_ms = if config.jitter {
203        // Add up to 25% jitter to prevent thundering herd
204        let jitter_factor = fastrand::f64().mul_add(0.25, 1.0);
205        delay_ms * jitter_factor
206    } else {
207        delay_ms
208    } as u64;
209
210    Duration::from_millis(final_delay_ms)
211}
212
213/// Executes a future with retry logic based on the configuration
214///
215/// # Errors
216/// Returns an error if all retry attempts fail or if a non-retryable error occurs
217pub async fn execute_with_retry<F, Fut, T>(
218    config: &RetryConfig,
219    _operation_name: &str,
220    mut operation: F,
221) -> Result<T, Error>
222where
223    F: FnMut() -> Fut,
224    Fut: std::future::Future<Output = Result<T, reqwest::Error>>,
225{
226    let _start_time = Instant::now();
227    let mut last_error = None;
228
229    for attempt in 0..config.max_attempts {
230        match operation().await {
231            Ok(result) => {
232                // Successfully completed operation
233                return Ok(result);
234            }
235            Err(error) => {
236                let is_last_attempt = attempt + 1 >= config.max_attempts;
237                let is_retryable = is_retryable_error(&error);
238
239                // Handle non-retryable errors immediately
240                if !is_retryable {
241                    let error_message = error.to_string();
242                    return Err(Error::transient_network_error(error_message, false));
243                }
244
245                // Handle last attempt
246                if is_last_attempt {
247                    let error_message = error.to_string();
248                    last_error = Some(error_message);
249                    break;
250                }
251
252                // Calculate delay and sleep before retry
253                let delay = calculate_retry_delay(config, attempt);
254
255                sleep(delay).await;
256                last_error = Some(error.to_string());
257            }
258        }
259    }
260
261    Err(Error::retry_limit_exceeded(
262        config.max_attempts.try_into().unwrap_or(u32::MAX),
263        last_error.unwrap_or_else(|| "Unknown error".to_string()),
264    ))
265}
266
267/// Executes a future with retry logic, tracking all retry attempts for diagnostics.
268///
269/// Unlike `execute_with_retry`, this function returns a `RetryResult` that includes
270/// the full retry history, useful for logging and structured error reporting.
271#[allow(clippy::cast_possible_truncation)]
272pub async fn execute_with_retry_tracking<F, Fut, T>(
273    config: &RetryConfig,
274    operation_name: &str,
275    mut operation: F,
276) -> RetryResult<T>
277where
278    F: FnMut() -> Fut,
279    Fut: std::future::Future<Output = Result<T, reqwest::Error>>,
280{
281    let mut retry_history = Vec::new();
282    let mut last_error = None;
283
284    for attempt in 0..config.max_attempts {
285        match operation().await {
286            Ok(result) => {
287                return RetryResult {
288                    result: Ok(result),
289                    retry_history,
290                    total_attempts: (attempt + 1) as u32,
291                };
292            }
293            Err(error) => {
294                let is_last_attempt = attempt + 1 >= config.max_attempts;
295                let is_retryable = is_retryable_error(&error);
296                let status_code = error.status().map(|s| s.as_u16());
297                let error_message = error.to_string();
298
299                // Handle non-retryable errors immediately
300                if !is_retryable {
301                    return RetryResult {
302                        result: Err(Error::transient_network_error(error_message, false)),
303                        retry_history,
304                        total_attempts: (attempt + 1) as u32,
305                    };
306                }
307
308                // Handle last attempt
309                if is_last_attempt {
310                    last_error = Some(error_message);
311                    break;
312                }
313
314                // Calculate delay
315                let delay = calculate_retry_delay(config, attempt);
316                let delay_ms = delay.as_millis() as u64;
317
318                // Record retry info
319                retry_history.push(RetryInfo::new(
320                    (attempt + 1) as u32,
321                    status_code,
322                    delay_ms,
323                    format!("{operation_name}: {error_message}"),
324                ));
325
326                // Sleep before retry
327                sleep(delay).await;
328                last_error = Some(error_message);
329            }
330        }
331    }
332
333    RetryResult {
334        result: Err(Error::retry_limit_exceeded(
335            config.max_attempts.try_into().unwrap_or(u32::MAX),
336            last_error.unwrap_or_else(|| "Unknown error".to_string()),
337        )),
338        retry_history,
339        total_attempts: config.max_attempts as u32,
340    }
341}
342
343/// Creates a resilient HTTP client with timeout configuration
344///
345/// # Errors
346/// Returns an error if the HTTP client cannot be created with the specified configuration
347pub fn create_resilient_client(timeout_config: &TimeoutConfig) -> Result<reqwest::Client, Error> {
348    reqwest::Client::builder()
349        .connect_timeout(Duration::from_millis(timeout_config.connect_timeout_ms))
350        .timeout(Duration::from_millis(timeout_config.request_timeout_ms))
351        .build()
352        .map_err(|e| {
353            Error::network_request_failed(format!("Failed to create resilient HTTP client: {e}"))
354        })
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    #[test]
362    fn test_calculate_retry_delay() {
363        let config = RetryConfig {
364            max_attempts: 5,
365            initial_delay_ms: 100,
366            max_delay_ms: 1000,
367            backoff_multiplier: 2.0,
368            jitter: false,
369        };
370
371        let delay1 = calculate_retry_delay(&config, 0);
372        let delay2 = calculate_retry_delay(&config, 1);
373        let delay3 = calculate_retry_delay(&config, 2);
374
375        assert_eq!(delay1.as_millis(), 100);
376        assert_eq!(delay2.as_millis(), 200);
377        assert_eq!(delay3.as_millis(), 400);
378
379        // Test max delay cap
380        let delay_max = calculate_retry_delay(&config, 10);
381        assert_eq!(delay_max.as_millis(), 1000);
382    }
383
384    #[test]
385    fn test_calculate_retry_delay_with_jitter() {
386        let config = RetryConfig {
387            max_attempts: 3,
388            initial_delay_ms: 100,
389            max_delay_ms: 1000,
390            backoff_multiplier: 2.0,
391            jitter: true,
392        };
393
394        let delay1 = calculate_retry_delay(&config, 0);
395        let delay2 = calculate_retry_delay(&config, 0);
396
397        // With jitter, delays should be different most of the time
398        // We test that both delays are within expected range
399        assert!(delay1.as_millis() >= 100 && delay1.as_millis() <= 125);
400        assert!(delay2.as_millis() >= 100 && delay2.as_millis() <= 125);
401    }
402
403    #[test]
404    fn test_default_configs() {
405        let retry_config = RetryConfig::default();
406        assert_eq!(retry_config.max_attempts, 3);
407        assert_eq!(retry_config.initial_delay_ms, 100);
408
409        let timeout_config = TimeoutConfig::default();
410        assert_eq!(timeout_config.connect_timeout_ms, 10_000);
411        assert_eq!(timeout_config.request_timeout_ms, 30_000);
412    }
413
414    #[test]
415    fn test_parse_retry_after_header_seconds() {
416        let mut headers = HeaderMap::new();
417        headers.insert("retry-after", "120".parse().unwrap());
418
419        let duration = parse_retry_after_header(&headers);
420        assert_eq!(duration, Some(Duration::from_secs(120)));
421    }
422
423    #[test]
424    fn test_parse_retry_after_header_zero() {
425        let mut headers = HeaderMap::new();
426        headers.insert("retry-after", "0".parse().unwrap());
427
428        let duration = parse_retry_after_header(&headers);
429        assert_eq!(duration, Some(Duration::from_secs(0)));
430    }
431
432    #[test]
433    fn test_parse_retry_after_header_missing() {
434        let headers = HeaderMap::new();
435
436        let duration = parse_retry_after_header(&headers);
437        assert_eq!(duration, None);
438    }
439
440    #[test]
441    fn test_parse_retry_after_header_invalid() {
442        let mut headers = HeaderMap::new();
443        headers.insert("retry-after", "not-a-number".parse().unwrap());
444
445        let duration = parse_retry_after_header(&headers);
446        // Invalid format that's neither a number nor valid HTTP-date
447        assert_eq!(duration, None);
448    }
449
450    #[test]
451    fn test_calculate_retry_delay_with_header_none() {
452        let config = RetryConfig {
453            max_attempts: 3,
454            initial_delay_ms: 100,
455            max_delay_ms: 5000,
456            backoff_multiplier: 2.0,
457            jitter: false,
458        };
459
460        let delay = calculate_retry_delay_with_header(&config, 0, None);
461        assert_eq!(delay.as_millis(), 100);
462    }
463
464    #[test]
465    fn test_calculate_retry_delay_with_header_uses_server_delay_when_larger() {
466        let config = RetryConfig {
467            max_attempts: 3,
468            initial_delay_ms: 100,
469            max_delay_ms: 5000,
470            backoff_multiplier: 2.0,
471            jitter: false,
472        };
473
474        // Server says wait 3 seconds, which is more than our 100ms
475        let retry_after = Some(Duration::from_secs(3));
476        let delay = calculate_retry_delay_with_header(&config, 0, retry_after);
477        assert_eq!(delay.as_secs(), 3);
478    }
479
480    #[test]
481    fn test_calculate_retry_delay_with_header_uses_calculated_when_larger() {
482        let config = RetryConfig {
483            max_attempts: 3,
484            initial_delay_ms: 5000,
485            max_delay_ms: 30_000,
486            backoff_multiplier: 2.0,
487            jitter: false,
488        };
489
490        // Server says wait 1 second, but our calculated delay is 5 seconds
491        let retry_after = Some(Duration::from_secs(1));
492        let delay = calculate_retry_delay_with_header(&config, 0, retry_after);
493        assert_eq!(delay.as_millis(), 5000);
494    }
495
496    #[test]
497    fn test_calculate_retry_delay_with_header_caps_at_max() {
498        let config = RetryConfig {
499            max_attempts: 3,
500            initial_delay_ms: 100,
501            max_delay_ms: 5000,
502            backoff_multiplier: 2.0,
503            jitter: false,
504        };
505
506        // Server says wait 60 seconds, but we cap at 5 seconds
507        let retry_after = Some(Duration::from_secs(60));
508        let delay = calculate_retry_delay_with_header(&config, 0, retry_after);
509        assert_eq!(delay.as_millis(), 5000);
510    }
511
512    #[test]
513    fn test_retry_info_new() {
514        let info = RetryInfo::new(1, Some(429), 500, "Rate limited");
515        assert_eq!(info.attempt, 1);
516        assert_eq!(info.status_code, Some(429));
517        assert_eq!(info.delay_ms, 500);
518        assert_eq!(info.reason, "Rate limited");
519    }
520
521    #[test]
522    fn test_retry_info_without_status_code() {
523        let info = RetryInfo::new(2, None, 1000, "Connection refused");
524        assert_eq!(info.attempt, 2);
525        assert_eq!(info.status_code, None);
526        assert_eq!(info.delay_ms, 1000);
527        assert_eq!(info.reason, "Connection refused");
528    }
529
530    #[test]
531    fn test_retry_result_success_no_retries() {
532        let result: RetryResult<i32> = RetryResult {
533            result: Ok(42),
534            retry_history: vec![],
535            total_attempts: 1,
536        };
537        assert!(result.result.is_ok());
538        assert!(result.retry_history.is_empty());
539        assert_eq!(result.total_attempts, 1);
540    }
541
542    #[test]
543    fn test_retry_result_success_after_retries() {
544        let result: RetryResult<i32> = RetryResult {
545            result: Ok(42),
546            retry_history: vec![RetryInfo::new(1, Some(503), 100, "Service unavailable")],
547            total_attempts: 2,
548        };
549        assert!(result.result.is_ok());
550        assert_eq!(result.retry_history.len(), 1);
551        assert_eq!(result.total_attempts, 2);
552    }
553
554    #[test]
555    fn test_is_retryable_status_408_request_timeout() {
556        assert!(is_retryable_status(408));
557    }
558
559    #[test]
560    fn test_is_retryable_status_429_too_many_requests() {
561        assert!(is_retryable_status(429));
562    }
563
564    #[test]
565    fn test_is_retryable_status_500_internal_server_error() {
566        assert!(is_retryable_status(500));
567    }
568
569    #[test]
570    fn test_is_retryable_status_502_bad_gateway() {
571        assert!(is_retryable_status(502));
572    }
573
574    #[test]
575    fn test_is_retryable_status_503_service_unavailable() {
576        assert!(is_retryable_status(503));
577    }
578
579    #[test]
580    fn test_is_retryable_status_504_gateway_timeout() {
581        assert!(is_retryable_status(504));
582    }
583
584    #[test]
585    fn test_is_retryable_status_501_not_implemented_not_retryable() {
586        // 501 Not Implemented should not be retryable
587        assert!(!is_retryable_status(501));
588    }
589
590    #[test]
591    fn test_is_retryable_status_505_http_version_not_supported_not_retryable() {
592        // 505 HTTP Version Not Supported should not be retryable
593        assert!(!is_retryable_status(505));
594    }
595
596    #[test]
597    fn test_is_retryable_status_4xx_not_retryable() {
598        // Most 4xx errors should not be retryable
599        assert!(!is_retryable_status(400)); // Bad Request
600        assert!(!is_retryable_status(401)); // Unauthorized
601        assert!(!is_retryable_status(403)); // Forbidden
602        assert!(!is_retryable_status(404)); // Not Found
603        assert!(!is_retryable_status(405)); // Method Not Allowed
604        assert!(!is_retryable_status(422)); // Unprocessable Entity
605    }
606
607    #[test]
608    fn test_is_retryable_status_2xx_not_retryable() {
609        // 2xx success codes should not be retryable
610        assert!(!is_retryable_status(200));
611        assert!(!is_retryable_status(201));
612        assert!(!is_retryable_status(204));
613    }
614
615    #[test]
616    fn test_is_retryable_status_3xx_not_retryable() {
617        // 3xx redirect codes should not be retryable
618        assert!(!is_retryable_status(301));
619        assert!(!is_retryable_status(302));
620        assert!(!is_retryable_status(304));
621    }
622
623    // ---- execute_with_retry unit tests ----
624    //
625    // These tests drive the retry executor directly using a mock closure backed
626    // by a wiremock server, which is the only reliable way to obtain real
627    // `reqwest::Error` values with specific characteristics.
628
629    use std::sync::atomic::{AtomicUsize, Ordering};
630    use std::sync::Arc;
631    use wiremock::matchers::{method, path};
632    use wiremock::{Mock, MockServer, ResponseTemplate};
633
634    /// Wiremock responder that returns 503 for the first `fail_for` calls, then 200.
635    struct FailThenSucceed {
636        fail_for: usize,
637        count: Arc<AtomicUsize>,
638    }
639    impl wiremock::Respond for FailThenSucceed {
640        fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
641            let n = self.count.fetch_add(1, Ordering::SeqCst);
642            if n < self.fail_for {
643                ResponseTemplate::new(503)
644            } else {
645                ResponseTemplate::new(200).set_body_string("done")
646            }
647        }
648    }
649
650    fn no_jitter_config(max_attempts: usize) -> RetryConfig {
651        RetryConfig {
652            max_attempts,
653            initial_delay_ms: 1,
654            max_delay_ms: 10,
655            backoff_multiplier: 2.0,
656            jitter: false,
657        }
658    }
659
660    async fn make_request(client: &reqwest::Client, url: &str) -> Result<String, reqwest::Error> {
661        let resp = client.get(url).send().await?;
662        let resp = resp.error_for_status()?;
663        let body = resp.text().await?;
664        Ok(body)
665    }
666
667    #[tokio::test]
668    async fn test_execute_with_retry_immediate_success() {
669        let server = MockServer::start().await;
670        Mock::given(method("GET"))
671            .and(path("/ok"))
672            .respond_with(ResponseTemplate::new(200).set_body_string("ok"))
673            .expect(1)
674            .mount(&server)
675            .await;
676
677        let client = reqwest::Client::new();
678        let url = format!("{}/ok", server.uri());
679        let config = no_jitter_config(3);
680
681        let result = execute_with_retry(&config, "test", || {
682            let client = client.clone();
683            let url = url.clone();
684            async move { make_request(&client, &url).await }
685        })
686        .await;
687
688        assert!(result.is_ok(), "should succeed on first attempt");
689        assert_eq!(result.unwrap(), "ok");
690    }
691
692    #[tokio::test]
693    async fn test_execute_with_retry_non_retryable_short_circuits() {
694        let server = MockServer::start().await;
695        // 400 is not retryable; the executor must stop after the first attempt.
696        Mock::given(method("GET"))
697            .and(path("/bad"))
698            .respond_with(ResponseTemplate::new(400))
699            .mount(&server)
700            .await;
701
702        let client = reqwest::Client::new();
703        let url = format!("{}/bad", server.uri());
704        let config = no_jitter_config(3);
705        let call_count = Arc::new(AtomicUsize::new(0));
706
707        let cc = call_count.clone();
708        let result = execute_with_retry(&config, "test", || {
709            let client = client.clone();
710            let url = url.clone();
711            let cc = cc.clone();
712            async move {
713                cc.fetch_add(1, Ordering::SeqCst);
714                make_request(&client, &url).await
715            }
716        })
717        .await;
718
719        assert!(result.is_err(), "non-retryable error must propagate");
720        assert_eq!(call_count.load(Ordering::SeqCst), 1, "must not retry a 400");
721    }
722
723    #[tokio::test]
724    async fn test_execute_with_retry_succeeds_after_transient_failures() {
725        let server = MockServer::start().await;
726        let count = Arc::new(AtomicUsize::new(0));
727        Mock::given(method("GET"))
728            .and(path("/flaky"))
729            .respond_with(FailThenSucceed {
730                fail_for: 2,
731                count: count.clone(),
732            })
733            .expect(3)
734            .mount(&server)
735            .await;
736
737        let client = reqwest::Client::new();
738        let url = format!("{}/flaky", server.uri());
739        let config = no_jitter_config(3);
740
741        let result = execute_with_retry(&config, "test", || {
742            let client = client.clone();
743            let url = url.clone();
744            async move { make_request(&client, &url).await }
745        })
746        .await;
747
748        assert!(result.is_ok(), "should succeed after two transient 503s");
749        assert_eq!(count.load(Ordering::SeqCst), 3, "must have made 3 calls");
750    }
751
752    #[tokio::test]
753    async fn test_execute_with_retry_exhaustion_returns_error() {
754        let server = MockServer::start().await;
755        Mock::given(method("GET"))
756            .and(path("/always-fail"))
757            .respond_with(ResponseTemplate::new(503))
758            .expect(3)
759            .mount(&server)
760            .await;
761
762        let client = reqwest::Client::new();
763        let url = format!("{}/always-fail", server.uri());
764        let config = no_jitter_config(3);
765
766        let result = execute_with_retry(&config, "test", || {
767            let client = client.clone();
768            let url = url.clone();
769            async move { make_request(&client, &url).await }
770        })
771        .await;
772
773        assert!(result.is_err(), "all attempts exhausted must return error");
774        let msg = result.unwrap_err().to_string();
775        assert!(
776            msg.contains("Retry limit exceeded"),
777            "error must mention retry exhaustion, got: {msg}"
778        );
779    }
780
781    // ---- execute_with_retry_tracking unit tests ----
782
783    #[tokio::test]
784    async fn test_execute_with_retry_tracking_immediate_success() {
785        let server = MockServer::start().await;
786        Mock::given(method("GET"))
787            .and(path("/ok"))
788            .respond_with(ResponseTemplate::new(200).set_body_string("ok"))
789            .expect(1)
790            .mount(&server)
791            .await;
792
793        let client = reqwest::Client::new();
794        let url = format!("{}/ok", server.uri());
795        let config = no_jitter_config(3);
796
797        let ret = execute_with_retry_tracking(&config, "test", || {
798            let client = client.clone();
799            let url = url.clone();
800            async move { make_request(&client, &url).await }
801        })
802        .await;
803
804        assert!(ret.result.is_ok());
805        assert_eq!(ret.total_attempts, 1);
806        assert!(
807            ret.retry_history.is_empty(),
808            "no retries on immediate success"
809        );
810    }
811
812    #[tokio::test]
813    async fn test_execute_with_retry_tracking_non_retryable_short_circuits() {
814        let server = MockServer::start().await;
815        Mock::given(method("GET"))
816            .and(path("/bad"))
817            .respond_with(ResponseTemplate::new(400))
818            .expect(1)
819            .mount(&server)
820            .await;
821
822        let client = reqwest::Client::new();
823        let url = format!("{}/bad", server.uri());
824        let config = no_jitter_config(3);
825
826        let ret = execute_with_retry_tracking(&config, "test", || {
827            let client = client.clone();
828            let url = url.clone();
829            async move { make_request(&client, &url).await }
830        })
831        .await;
832
833        assert!(ret.result.is_err());
834        assert_eq!(ret.total_attempts, 1, "must stop after the first attempt");
835        assert!(
836            ret.retry_history.is_empty(),
837            "non-retryable error must not populate retry history"
838        );
839    }
840
841    #[tokio::test]
842    async fn test_execute_with_retry_tracking_records_history() {
843        let server = MockServer::start().await;
844        let count = Arc::new(AtomicUsize::new(0));
845        Mock::given(method("GET"))
846            .and(path("/flaky"))
847            .respond_with(FailThenSucceed {
848                fail_for: 2,
849                count: count.clone(),
850            })
851            .expect(3)
852            .mount(&server)
853            .await;
854
855        let client = reqwest::Client::new();
856        let url = format!("{}/flaky", server.uri());
857        let config = no_jitter_config(3);
858
859        let ret = execute_with_retry_tracking(&config, "test-op", || {
860            let client = client.clone();
861            let url = url.clone();
862            async move { make_request(&client, &url).await }
863        })
864        .await;
865
866        assert!(ret.result.is_ok(), "should eventually succeed");
867        assert_eq!(ret.total_attempts, 3);
868        // Two failures → two retry history entries.
869        assert_eq!(ret.retry_history.len(), 2);
870        // History entries are 1-indexed attempt numbers.
871        assert_eq!(ret.retry_history[0].attempt, 1);
872        assert_eq!(ret.retry_history[1].attempt, 2);
873    }
874
875    #[tokio::test]
876    async fn test_execute_with_retry_tracking_exhaustion_total_attempts() {
877        let server = MockServer::start().await;
878        Mock::given(method("GET"))
879            .and(path("/always-fail"))
880            .respond_with(ResponseTemplate::new(503))
881            .expect(3)
882            .mount(&server)
883            .await;
884
885        let client = reqwest::Client::new();
886        let url = format!("{}/always-fail", server.uri());
887        let config = no_jitter_config(3);
888
889        let ret = execute_with_retry_tracking(&config, "test-op", || {
890            let client = client.clone();
891            let url = url.clone();
892            async move { make_request(&client, &url).await }
893        })
894        .await;
895
896        assert!(ret.result.is_err(), "all attempts exhausted");
897        assert_eq!(ret.total_attempts, 3);
898        // Two retries recorded (the final exhausted attempt is not added to history).
899        assert_eq!(ret.retry_history.len(), 2);
900    }
901}
aperture_cli/resilience/mod.rs

aperture_cli/resilience/
mod.rs