mockforge_grpc/reflection/
error_handling.rs

1//! Error handling and retry mechanisms for the reflection proxy
2
3use serde::{Deserialize, Serialize};
4use std::time::Duration;
5use tokio::time::sleep;
6use tonic::Status;
7use tracing::{debug, error, warn};
8
9/// Configuration for error handling and retries
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct ErrorConfig {
12    /// Maximum number of retries for failed requests
13    pub max_retries: u32,
14    /// Base delay between retries (in milliseconds)
15    pub base_delay_ms: u64,
16    /// Maximum delay between retries (in milliseconds)
17    pub max_delay_ms: u64,
18    /// Whether to enable exponential backoff
19    pub exponential_backoff: bool,
20}
21
22impl Default for ErrorConfig {
23    fn default() -> Self {
24        Self {
25            max_retries: 3,
26            base_delay_ms: 100,
27            max_delay_ms: 5000,
28            exponential_backoff: true,
29        }
30    }
31}
32
33/// Handle errors with retry logic
34pub async fn handle_with_retry<F, Fut, T>(
35    mut operation: F,
36    config: &ErrorConfig,
37) -> Result<T, Status>
38where
39    F: FnMut() -> Fut,
40    Fut: std::future::Future<Output = Result<T, Status>>,
41{
42    let mut attempts = 0;
43    let mut delay = Duration::from_millis(config.base_delay_ms);
44
45    loop {
46        match operation().await {
47            Ok(result) => return Ok(result),
48            Err(status) => {
49                attempts += 1;
50
51                // If we've reached the maximum retries, return the error
52                if attempts > config.max_retries {
53                    error!("Operation failed after {} attempts: {}", attempts, status);
54                    return Err(status);
55                }
56
57                // For certain types of errors, we might not want to retry
58                match status.code() {
59                    // Don't retry these errors
60                    tonic::Code::InvalidArgument
61                    | tonic::Code::NotFound
62                    | tonic::Code::AlreadyExists
63                    | tonic::Code::PermissionDenied
64                    | tonic::Code::FailedPrecondition
65                    | tonic::Code::Aborted
66                    | tonic::Code::OutOfRange
67                    | tonic::Code::Unimplemented => {
68                        error!("Non-retryable error: {}", status);
69                        return Err(status);
70                    }
71                    // Retry all other errors
72                    _ => {
73                        warn!(
74                            "Attempt {} failed: {}. Retrying in {:?}...",
75                            attempts, status, delay
76                        );
77
78                        // Wait before retrying
79                        sleep(delay).await;
80
81                        // Calculate next delay with exponential backoff
82                        if config.exponential_backoff {
83                            delay = Duration::from_millis(
84                                (delay.as_millis() * 2).min(config.max_delay_ms as u128) as u64,
85                            );
86                        }
87                    }
88                }
89            }
90        }
91    }
92}
93
94/// Simulate various error conditions for testing
95pub fn simulate_error(error_rate: f64) -> Result<(), Status> {
96    use rand::Rng;
97
98    let mut rng = rand::rng();
99    let random: f64 = rng.random();
100
101    if random < error_rate {
102        // Simulate different types of errors
103        let error_type: u32 = rng.random_range(0..5);
104        match error_type {
105            0 => Err(Status::unavailable("Simulated service unavailable")),
106            1 => Err(Status::deadline_exceeded("Simulated timeout")),
107            2 => Err(Status::internal("Simulated internal error")),
108            3 => Err(Status::resource_exhausted("Simulated resource exhausted")),
109            _ => Err(Status::unknown("Simulated unknown error")),
110        }
111    } else {
112        Ok(())
113    }
114}
115
116/// Add latency to simulate network delays
117pub async fn simulate_latency(latency_ms: u64) {
118    if latency_ms > 0 {
119        debug!("Simulating {}ms latency", latency_ms);
120        sleep(Duration::from_millis(latency_ms)).await;
121    }
122}
123
124#[cfg(test)]
125mod tests {
126    use super::*;
127
128    // ==================== ErrorConfig Tests ====================
129
130    #[test]
131    fn test_error_config_default() {
132        let config = ErrorConfig::default();
133
134        assert_eq!(config.max_retries, 3);
135        assert_eq!(config.base_delay_ms, 100);
136        assert_eq!(config.max_delay_ms, 5000);
137        assert!(config.exponential_backoff);
138    }
139
140    #[test]
141    fn test_error_config_custom_values() {
142        let config = ErrorConfig {
143            max_retries: 5,
144            base_delay_ms: 200,
145            max_delay_ms: 10000,
146            exponential_backoff: false,
147        };
148
149        assert_eq!(config.max_retries, 5);
150        assert_eq!(config.base_delay_ms, 200);
151        assert_eq!(config.max_delay_ms, 10000);
152        assert!(!config.exponential_backoff);
153    }
154
155    #[test]
156    fn test_error_config_clone() {
157        let config = ErrorConfig {
158            max_retries: 4,
159            base_delay_ms: 150,
160            max_delay_ms: 8000,
161            exponential_backoff: true,
162        };
163
164        let cloned = config.clone();
165
166        assert_eq!(cloned.max_retries, config.max_retries);
167        assert_eq!(cloned.base_delay_ms, config.base_delay_ms);
168        assert_eq!(cloned.max_delay_ms, config.max_delay_ms);
169        assert_eq!(cloned.exponential_backoff, config.exponential_backoff);
170    }
171
172    #[test]
173    fn test_error_config_debug() {
174        let config = ErrorConfig::default();
175        let debug_str = format!("{:?}", config);
176
177        assert!(debug_str.contains("max_retries"));
178        assert!(debug_str.contains("base_delay_ms"));
179        assert!(debug_str.contains("max_delay_ms"));
180        assert!(debug_str.contains("exponential_backoff"));
181    }
182
183    #[test]
184    fn test_error_config_serialization() {
185        let config = ErrorConfig {
186            max_retries: 5,
187            base_delay_ms: 250,
188            max_delay_ms: 15000,
189            exponential_backoff: true,
190        };
191
192        let json = serde_json::to_string(&config).unwrap();
193        let deserialized: ErrorConfig = serde_json::from_str(&json).unwrap();
194
195        assert_eq!(deserialized.max_retries, config.max_retries);
196        assert_eq!(deserialized.base_delay_ms, config.base_delay_ms);
197        assert_eq!(deserialized.max_delay_ms, config.max_delay_ms);
198        assert_eq!(deserialized.exponential_backoff, config.exponential_backoff);
199    }
200
201    #[test]
202    fn test_error_config_deserialization() {
203        let json = r#"{
204            "max_retries": 10,
205            "base_delay_ms": 500,
206            "max_delay_ms": 30000,
207            "exponential_backoff": false
208        }"#;
209
210        let config: ErrorConfig = serde_json::from_str(json).unwrap();
211
212        assert_eq!(config.max_retries, 10);
213        assert_eq!(config.base_delay_ms, 500);
214        assert_eq!(config.max_delay_ms, 30000);
215        assert!(!config.exponential_backoff);
216    }
217
218    #[test]
219    fn test_error_config_zero_retries() {
220        let config = ErrorConfig {
221            max_retries: 0,
222            base_delay_ms: 100,
223            max_delay_ms: 1000,
224            exponential_backoff: true,
225        };
226
227        assert_eq!(config.max_retries, 0);
228    }
229
230    #[test]
231    fn test_error_config_high_retries() {
232        let config = ErrorConfig {
233            max_retries: 100,
234            base_delay_ms: 10,
235            max_delay_ms: 60000,
236            exponential_backoff: true,
237        };
238
239        assert_eq!(config.max_retries, 100);
240    }
241
242    // ==================== simulate_error Tests ====================
243
244    #[test]
245    fn test_simulate_error_zero_rate() {
246        // With 0% error rate, should always succeed
247        for _ in 0..100 {
248            let result = simulate_error(0.0);
249            assert!(result.is_ok());
250        }
251    }
252
253    #[test]
254    fn test_simulate_error_full_rate() {
255        // With 100% error rate, should always fail
256        for _ in 0..100 {
257            let result = simulate_error(1.0);
258            assert!(result.is_err());
259        }
260    }
261
262    #[test]
263    fn test_simulate_error_produces_status() {
264        // When error occurs, should return a tonic Status
265        let result = simulate_error(1.0);
266        assert!(result.is_err());
267
268        let status = result.unwrap_err();
269        // Status should have a valid code
270        let code = status.code();
271        assert!(matches!(
272            code,
273            tonic::Code::Unavailable
274                | tonic::Code::DeadlineExceeded
275                | tonic::Code::Internal
276                | tonic::Code::ResourceExhausted
277                | tonic::Code::Unknown
278        ));
279    }
280
281    #[test]
282    fn test_simulate_error_partial_rate() {
283        // With 50% error rate, should have some successes and some failures
284        let mut successes = 0;
285        let mut failures = 0;
286
287        for _ in 0..1000 {
288            match simulate_error(0.5) {
289                Ok(()) => successes += 1,
290                Err(_) => failures += 1,
291            }
292        }
293
294        // With 1000 samples, we should have both successes and failures
295        assert!(successes > 0, "Expected some successes");
296        assert!(failures > 0, "Expected some failures");
297    }
298
299    // ==================== simulate_latency Tests ====================
300
301    #[tokio::test]
302    async fn test_simulate_latency_zero() {
303        let start = std::time::Instant::now();
304        simulate_latency(0).await;
305        let elapsed = start.elapsed();
306
307        // Should complete almost instantly (allow 10ms margin)
308        assert!(elapsed.as_millis() < 10);
309    }
310
311    #[tokio::test]
312    async fn test_simulate_latency_short() {
313        let start = std::time::Instant::now();
314        simulate_latency(50).await;
315        let elapsed = start.elapsed();
316
317        // Should take at least 50ms (allow some margin)
318        assert!(elapsed.as_millis() >= 45);
319        // Should not take too long (allow 100ms margin for scheduling)
320        assert!(elapsed.as_millis() < 150);
321    }
322
323    #[tokio::test]
324    async fn test_simulate_latency_longer() {
325        let start = std::time::Instant::now();
326        simulate_latency(100).await;
327        let elapsed = start.elapsed();
328
329        // Should take at least 100ms
330        assert!(elapsed.as_millis() >= 95);
331    }
332
333    // ==================== handle_with_retry Tests ====================
334
335    #[tokio::test]
336    async fn test_handle_with_retry_immediate_success() {
337        let config = ErrorConfig::default();
338
339        let result = handle_with_retry(|| async { Ok::<_, Status>("success") }, &config).await;
340
341        assert!(result.is_ok());
342        assert_eq!(result.unwrap(), "success");
343    }
344
345    #[tokio::test]
346    async fn test_handle_with_retry_non_retryable_error() {
347        let config = ErrorConfig::default();
348
349        // InvalidArgument is a non-retryable error
350        let result = handle_with_retry(
351            || async { Err::<(), _>(Status::invalid_argument("bad argument")) },
352            &config,
353        )
354        .await;
355
356        assert!(result.is_err());
357        assert_eq!(result.unwrap_err().code(), tonic::Code::InvalidArgument);
358    }
359
360    #[tokio::test]
361    async fn test_handle_with_retry_not_found_no_retry() {
362        let config = ErrorConfig::default();
363
364        let result = handle_with_retry(
365            || async { Err::<(), _>(Status::not_found("resource not found")) },
366            &config,
367        )
368        .await;
369
370        assert!(result.is_err());
371        assert_eq!(result.unwrap_err().code(), tonic::Code::NotFound);
372    }
373
374    #[tokio::test]
375    async fn test_handle_with_retry_already_exists_no_retry() {
376        let config = ErrorConfig::default();
377
378        let result = handle_with_retry(
379            || async { Err::<(), _>(Status::already_exists("already exists")) },
380            &config,
381        )
382        .await;
383
384        assert!(result.is_err());
385        assert_eq!(result.unwrap_err().code(), tonic::Code::AlreadyExists);
386    }
387
388    #[tokio::test]
389    async fn test_handle_with_retry_permission_denied_no_retry() {
390        let config = ErrorConfig::default();
391
392        let result = handle_with_retry(
393            || async { Err::<(), _>(Status::permission_denied("access denied")) },
394            &config,
395        )
396        .await;
397
398        assert!(result.is_err());
399        assert_eq!(result.unwrap_err().code(), tonic::Code::PermissionDenied);
400    }
401
402    #[tokio::test]
403    async fn test_handle_with_retry_unimplemented_no_retry() {
404        let config = ErrorConfig::default();
405
406        let result = handle_with_retry(
407            || async { Err::<(), _>(Status::unimplemented("not implemented")) },
408            &config,
409        )
410        .await;
411
412        assert!(result.is_err());
413        assert_eq!(result.unwrap_err().code(), tonic::Code::Unimplemented);
414    }
415
416    #[tokio::test]
417    async fn test_handle_with_retry_retryable_error_eventual_success() {
418        let config = ErrorConfig {
419            max_retries: 3,
420            base_delay_ms: 10,
421            max_delay_ms: 100,
422            exponential_backoff: false,
423        };
424
425        let counter = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(0));
426        let counter_clone = counter.clone();
427
428        let result = handle_with_retry(
429            || {
430                let counter = counter_clone.clone();
431                async move {
432                    let count = counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
433                    if count < 2 {
434                        Err(Status::unavailable("service unavailable"))
435                    } else {
436                        Ok("success")
437                    }
438                }
439            },
440            &config,
441        )
442        .await;
443
444        assert!(result.is_ok());
445        assert_eq!(result.unwrap(), "success");
446        // Should have been called 3 times (2 failures + 1 success)
447        assert_eq!(counter.load(std::sync::atomic::Ordering::Relaxed), 3);
448    }
449
450    #[tokio::test]
451    async fn test_handle_with_retry_max_retries_exceeded() {
452        let config = ErrorConfig {
453            max_retries: 2,
454            base_delay_ms: 10,
455            max_delay_ms: 100,
456            exponential_backoff: false,
457        };
458
459        let counter = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(0));
460        let counter_clone = counter.clone();
461
462        let result = handle_with_retry(
463            || {
464                let counter = counter_clone.clone();
465                async move {
466                    counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
467                    Err::<(), _>(Status::unavailable("service unavailable"))
468                }
469            },
470            &config,
471        )
472        .await;
473
474        assert!(result.is_err());
475        // Initial attempt + 2 retries = 3 total calls
476        assert_eq!(counter.load(std::sync::atomic::Ordering::Relaxed), 3);
477    }
478
479    #[tokio::test]
480    async fn test_handle_with_retry_zero_retries() {
481        let config = ErrorConfig {
482            max_retries: 0,
483            base_delay_ms: 10,
484            max_delay_ms: 100,
485            exponential_backoff: false,
486        };
487
488        let counter = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(0));
489        let counter_clone = counter.clone();
490
491        let result = handle_with_retry(
492            || {
493                let counter = counter_clone.clone();
494                async move {
495                    counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
496                    Err::<(), _>(Status::unavailable("service unavailable"))
497                }
498            },
499            &config,
500        )
501        .await;
502
503        assert!(result.is_err());
504        // With 0 retries, should only try once
505        assert_eq!(counter.load(std::sync::atomic::Ordering::Relaxed), 1);
506    }
507
508    #[tokio::test]
509    async fn test_handle_with_retry_deadline_exceeded_retryable() {
510        let config = ErrorConfig {
511            max_retries: 2,
512            base_delay_ms: 10,
513            max_delay_ms: 100,
514            exponential_backoff: false,
515        };
516
517        let counter = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(0));
518        let counter_clone = counter.clone();
519
520        let _ = handle_with_retry(
521            || {
522                let counter = counter_clone.clone();
523                async move {
524                    counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
525                    Err::<(), _>(Status::deadline_exceeded("timeout"))
526                }
527            },
528            &config,
529        )
530        .await;
531
532        // DeadlineExceeded is retryable, so should try 3 times
533        assert_eq!(counter.load(std::sync::atomic::Ordering::Relaxed), 3);
534    }
535
536    #[tokio::test]
537    async fn test_handle_with_retry_internal_error_retryable() {
538        let config = ErrorConfig {
539            max_retries: 1,
540            base_delay_ms: 10,
541            max_delay_ms: 100,
542            exponential_backoff: false,
543        };
544
545        let counter = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(0));
546        let counter_clone = counter.clone();
547
548        let _ = handle_with_retry(
549            || {
550                let counter = counter_clone.clone();
551                async move {
552                    counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
553                    Err::<(), _>(Status::internal("internal error"))
554                }
555            },
556            &config,
557        )
558        .await;
559
560        // Internal is retryable, should try 2 times (1 initial + 1 retry)
561        assert_eq!(counter.load(std::sync::atomic::Ordering::Relaxed), 2);
562    }
563
564    // ==================== Edge Cases ====================
565
566    #[test]
567    fn test_error_config_json_roundtrip() {
568        let config = ErrorConfig::default();
569        let json = serde_json::to_string(&config).unwrap();
570        let roundtrip: ErrorConfig = serde_json::from_str(&json).unwrap();
571
572        assert_eq!(roundtrip.max_retries, config.max_retries);
573        assert_eq!(roundtrip.base_delay_ms, config.base_delay_ms);
574        assert_eq!(roundtrip.max_delay_ms, config.max_delay_ms);
575        assert_eq!(roundtrip.exponential_backoff, config.exponential_backoff);
576    }
577
578    #[test]
579    fn test_simulate_error_negative_rate_treated_as_zero() {
580        // Negative error rate should effectively be 0%
581        let result = simulate_error(-0.5);
582        assert!(result.is_ok());
583    }
584
585    #[test]
586    fn test_simulate_error_rate_above_one_always_fails() {
587        // Error rate above 1.0 should always fail
588        for _ in 0..10 {
589            let result = simulate_error(1.5);
590            assert!(result.is_err());
591        }
592    }
593}