Skip to main content

aranet_core/
retry.rs

1//! Retry logic for BLE operations.
2//!
3//! This module provides configurable retry functionality for handling
4//! transient BLE failures.
5//!
6//! # Example
7//!
8//! ```
9//! use aranet_core::{RetryConfig, with_retry, Error};
10//!
11//! # async fn example() -> Result<(), Error> {
12//! // Configure retry behavior (3 retries with default settings)
13//! let config = RetryConfig::new(3);
14//!
15//! // Or use aggressive settings for unreliable connections
16//! let aggressive = RetryConfig::aggressive();
17//!
18//! // Use with_retry to wrap fallible operations
19//! let result = with_retry(&config, "read_sensor", || async {
20//!     // Your BLE operation here
21//!     Ok::<_, Error>(42)
22//! }).await?;
23//! # Ok(())
24//! # }
25//! ```
26
27use std::future::Future;
28use std::time::Duration;
29
30use rand::Rng;
31use tokio::time::sleep;
32use tracing::{debug, warn};
33
34use crate::error::{Error, Result};
35
36/// Configuration for retry behavior.
37#[derive(Debug, Clone)]
38pub struct RetryConfig {
39    /// Maximum number of retry attempts (0 means no retries).
40    pub max_retries: u32,
41    /// Initial delay between retries.
42    pub initial_delay: Duration,
43    /// Maximum delay between retries (for exponential backoff).
44    pub max_delay: Duration,
45    /// Backoff multiplier (1.0 = constant delay, 2.0 = double each time).
46    pub backoff_multiplier: f64,
47    /// Whether to add jitter to delays.
48    pub jitter: bool,
49}
50
51impl Default for RetryConfig {
52    fn default() -> Self {
53        Self {
54            max_retries: 3,
55            initial_delay: Duration::from_millis(100),
56            max_delay: Duration::from_secs(5),
57            backoff_multiplier: 2.0,
58            jitter: true,
59        }
60    }
61}
62
63impl RetryConfig {
64    /// Create a new retry config with custom settings.
65    pub fn new(max_retries: u32) -> Self {
66        Self {
67            max_retries,
68            ..Default::default()
69        }
70    }
71
72    /// No retries.
73    pub fn none() -> Self {
74        Self {
75            max_retries: 0,
76            ..Default::default()
77        }
78    }
79
80    /// Conservative retry settings for unreliable connections.
81    pub fn aggressive() -> Self {
82        Self {
83            max_retries: 5,
84            initial_delay: Duration::from_millis(50),
85            max_delay: Duration::from_secs(10),
86            backoff_multiplier: 1.5,
87            jitter: true,
88        }
89    }
90
91    // ==================== Per-Operation Presets ====================
92    //
93    // Different operations have different characteristics and should
94    // be retried differently:
95    //
96    // - Scan: Fast retries, BLE scanning often needs multiple attempts
97    // - Connect: Patient retries, device may be busy or waking up
98    // - Read: Standard retries, transient BLE errors
99    // - Write: Careful retries, writes can fail transiently
100    // - History: Persistent retries, long operation, save progress
101
102    /// Retry configuration optimized for device scanning.
103    ///
104    /// Scanning often requires multiple attempts due to:
105    /// - BLE adapter warm-up
106    /// - Devices advertising at intervals (Aranet ~4s)
107    /// - RF interference
108    ///
109    /// Uses aggressive, fast retries with short delays.
110    pub fn for_scan() -> Self {
111        Self {
112            max_retries: 5,
113            initial_delay: Duration::from_millis(200),
114            max_delay: Duration::from_secs(2),
115            backoff_multiplier: 1.5,
116            jitter: true,
117        }
118    }
119
120    /// Retry configuration optimized for device connection.
121    ///
122    /// Connections may fail due to:
123    /// - Device busy with another central
124    /// - Device in low-power mode (slower wake-up)
125    /// - Signal strength variations
126    ///
127    /// Uses patient retries with longer delays to allow device recovery.
128    pub fn for_connect() -> Self {
129        Self {
130            max_retries: 3,
131            initial_delay: Duration::from_secs(1),
132            max_delay: Duration::from_secs(10),
133            backoff_multiplier: 2.0,
134            jitter: true,
135        }
136    }
137
138    /// Retry configuration optimized for characteristic reads.
139    ///
140    /// Reads may fail due to:
141    /// - Transient BLE errors
142    /// - Connection instability
143    /// - Device processing delay
144    ///
145    /// Uses standard retries suitable for most read operations.
146    pub fn for_read() -> Self {
147        Self {
148            max_retries: 3,
149            initial_delay: Duration::from_millis(100),
150            max_delay: Duration::from_secs(2),
151            backoff_multiplier: 2.0,
152            jitter: true,
153        }
154    }
155
156    /// Retry configuration optimized for characteristic writes.
157    ///
158    /// Writes may fail due to:
159    /// - BLE transmission errors
160    /// - Device busy processing previous write
161    /// - Connection instability
162    ///
163    /// Uses careful retries with moderate delays.
164    pub fn for_write() -> Self {
165        Self {
166            max_retries: 2,
167            initial_delay: Duration::from_millis(200),
168            max_delay: Duration::from_secs(3),
169            backoff_multiplier: 2.0,
170            jitter: true,
171        }
172    }
173
174    /// Retry configuration optimized for history downloads.
175    ///
176    /// History downloads are long-running operations that may fail due to:
177    /// - Connection drops during extended transfer
178    /// - Device timeout during large transfers
179    /// - BLE congestion from repeated reads
180    ///
181    /// Uses persistent retries with longer delays, designed to work
182    /// with checkpoint-based resumption for large downloads.
183    pub fn for_history() -> Self {
184        Self {
185            max_retries: 5,
186            initial_delay: Duration::from_millis(500),
187            max_delay: Duration::from_secs(15),
188            backoff_multiplier: 2.0,
189            jitter: true,
190        }
191    }
192
193    /// Retry configuration optimized for reconnection attempts.
194    ///
195    /// After a connection loss, the device may need time to:
196    /// - Reset its BLE state
197    /// - Complete other operations
198    /// - Recover from low-power mode
199    ///
200    /// Uses very patient retries with long delays.
201    pub fn for_reconnect() -> Self {
202        Self {
203            max_retries: 5,
204            initial_delay: Duration::from_secs(2),
205            max_delay: Duration::from_secs(30),
206            backoff_multiplier: 2.0,
207            jitter: true,
208        }
209    }
210
211    /// Retry configuration for quick, time-sensitive operations.
212    ///
213    /// For operations where speed is more important than reliability,
214    /// uses minimal retries with very short delays.
215    pub fn quick() -> Self {
216        Self {
217            max_retries: 2,
218            initial_delay: Duration::from_millis(50),
219            max_delay: Duration::from_millis(500),
220            backoff_multiplier: 2.0,
221            jitter: false,
222        }
223    }
224
225    // ==================== Builder Methods ====================
226
227    /// Set maximum number of retries.
228    #[must_use]
229    pub fn max_retries(mut self, retries: u32) -> Self {
230        self.max_retries = retries;
231        self
232    }
233
234    /// Set initial delay.
235    #[must_use]
236    pub fn initial_delay(mut self, delay: Duration) -> Self {
237        self.initial_delay = delay;
238        self
239    }
240
241    /// Set maximum delay.
242    #[must_use]
243    pub fn max_delay(mut self, delay: Duration) -> Self {
244        self.max_delay = delay;
245        self
246    }
247
248    /// Set backoff multiplier.
249    #[must_use]
250    pub fn backoff_multiplier(mut self, multiplier: f64) -> Self {
251        self.backoff_multiplier = multiplier;
252        self
253    }
254
255    /// Enable or disable jitter.
256    #[must_use]
257    pub fn jitter(mut self, enabled: bool) -> Self {
258        self.jitter = enabled;
259        self
260    }
261
262    /// Calculate delay for a given attempt number.
263    fn delay_for_attempt(&self, attempt: u32) -> Duration {
264        let base_delay =
265            self.initial_delay.as_secs_f64() * self.backoff_multiplier.powi(attempt as i32);
266        let capped_delay = base_delay.min(self.max_delay.as_secs_f64());
267
268        let final_delay = if self.jitter {
269            // Add up to 25% jitter using proper random number generation
270            let jitter_factor = 1.0 + (rand::rng().random::<f64>() * 0.25);
271            capped_delay * jitter_factor
272        } else {
273            capped_delay
274        };
275
276        Duration::from_secs_f64(final_delay)
277    }
278}
279
280/// Execute an async operation with retry logic.
281///
282/// # Arguments
283///
284/// * `config` - Retry configuration
285/// * `operation` - The async operation to retry
286/// * `operation_name` - Name for logging purposes
287///
288/// # Returns
289///
290/// The result of the operation, or the last error if all retries failed.
291pub async fn with_retry<F, Fut, T>(
292    config: &RetryConfig,
293    operation_name: &str,
294    operation: F,
295) -> Result<T>
296where
297    F: Fn() -> Fut,
298    Fut: Future<Output = Result<T>>,
299{
300    let mut last_error = None;
301
302    for attempt in 0..=config.max_retries {
303        match operation().await {
304            Ok(result) => {
305                if attempt > 0 {
306                    debug!("{} succeeded after {} retries", operation_name, attempt);
307                }
308                return Ok(result);
309            }
310            Err(e) => {
311                if !is_retryable(&e) {
312                    return Err(e);
313                }
314
315                last_error = Some(e);
316
317                if attempt < config.max_retries {
318                    let delay = config.delay_for_attempt(attempt);
319                    warn!(
320                        "{} failed (attempt {}/{}), retrying in {:?}",
321                        operation_name,
322                        attempt + 1,
323                        config.max_retries + 1,
324                        delay
325                    );
326                    sleep(delay).await;
327                }
328            }
329        }
330    }
331
332    Err(last_error
333        .unwrap_or_else(|| Error::InvalidData("Operation failed with no error".to_string())))
334}
335
336/// Check if an error is retryable.
337fn is_retryable(error: &Error) -> bool {
338    use crate::error::ConnectionFailureReason;
339
340    match error {
341        // Timeout errors are usually transient
342        Error::Timeout { .. } => true,
343        // Bluetooth errors are often transient
344        Error::Bluetooth(_) => true,
345        // Connection failed - check the reason
346        Error::ConnectionFailed { reason, .. } => {
347            matches!(
348                reason,
349                ConnectionFailureReason::OutOfRange
350                    | ConnectionFailureReason::Timeout
351                    | ConnectionFailureReason::BleError(_)
352                    | ConnectionFailureReason::Other(_)
353            )
354        }
355        // Not connected errors might be transient
356        Error::NotConnected => true,
357        // Write failures might be transient
358        Error::WriteFailed { .. } => true,
359        // Invalid data is not retryable
360        Error::InvalidData(_) => false,
361        // Invalid history data is not retryable
362        Error::InvalidHistoryData { .. } => false,
363        // Invalid reading format is not retryable
364        Error::InvalidReadingFormat { .. } => false,
365        // Device not found is not retryable
366        Error::DeviceNotFound(_) => false,
367        // Characteristic not found is not retryable
368        Error::CharacteristicNotFound { .. } => false,
369        // Cancelled is not retryable
370        Error::Cancelled => false,
371        // I/O errors might be transient
372        Error::Io(_) => true,
373        // Invalid configuration is not retryable
374        Error::InvalidConfig(_) => false,
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381    use crate::error::{ConnectionFailureReason, DeviceNotFoundReason};
382    use std::sync::Arc;
383    use std::sync::atomic::{AtomicU32, Ordering};
384
385    #[test]
386    fn test_retry_config_default() {
387        let config = RetryConfig::default();
388        assert_eq!(config.max_retries, 3);
389        assert!(config.jitter);
390    }
391
392    #[test]
393    fn test_retry_config_none() {
394        let config = RetryConfig::none();
395        assert_eq!(config.max_retries, 0);
396    }
397
398    #[test]
399    fn test_delay_calculation() {
400        let config = RetryConfig {
401            initial_delay: Duration::from_millis(100),
402            backoff_multiplier: 2.0,
403            max_delay: Duration::from_secs(10),
404            jitter: false,
405            max_retries: 5,
406        };
407
408        assert_eq!(config.delay_for_attempt(0), Duration::from_millis(100));
409        assert_eq!(config.delay_for_attempt(1), Duration::from_millis(200));
410        assert_eq!(config.delay_for_attempt(2), Duration::from_millis(400));
411    }
412
413    #[test]
414    fn test_is_retryable() {
415        assert!(is_retryable(&Error::Timeout {
416            operation: "test".to_string(),
417            duration: Duration::from_secs(1),
418        }));
419        assert!(is_retryable(&Error::ConnectionFailed {
420            device_id: None,
421            reason: ConnectionFailureReason::Other("test".to_string()),
422        }));
423        assert!(is_retryable(&Error::NotConnected));
424        assert!(!is_retryable(&Error::InvalidData("test".to_string())));
425        assert!(!is_retryable(&Error::DeviceNotFound(
426            DeviceNotFoundReason::NotFound {
427                identifier: "test".to_string()
428            }
429        )));
430    }
431
432    #[tokio::test]
433    async fn test_with_retry_immediate_success() {
434        let config = RetryConfig::new(3);
435        let result = with_retry(&config, "test", || async { Ok::<_, Error>(42) }).await;
436        assert_eq!(result.unwrap(), 42);
437    }
438
439    #[tokio::test]
440    async fn test_with_retry_eventual_success() {
441        let config = RetryConfig {
442            max_retries: 3,
443            initial_delay: Duration::from_millis(1),
444            jitter: false,
445            ..Default::default()
446        };
447
448        let attempts = Arc::new(AtomicU32::new(0));
449        let attempts_clone = Arc::clone(&attempts);
450
451        let result: Result<i32> = with_retry(&config, "test", || {
452            let attempts = Arc::clone(&attempts_clone);
453            async move {
454                let count = attempts.fetch_add(1, Ordering::SeqCst);
455                if count < 2 {
456                    Err(Error::ConnectionFailed {
457                        device_id: None,
458                        reason: ConnectionFailureReason::Other("transient error".to_string()),
459                    })
460                } else {
461                    Ok(42)
462                }
463            }
464        })
465        .await;
466
467        assert_eq!(result.unwrap(), 42);
468        assert_eq!(attempts.load(Ordering::SeqCst), 3);
469    }
470
471    #[tokio::test]
472    async fn test_with_retry_all_fail() {
473        let config = RetryConfig {
474            max_retries: 2,
475            initial_delay: Duration::from_millis(1),
476            jitter: false,
477            ..Default::default()
478        };
479
480        let attempts = Arc::new(AtomicU32::new(0));
481        let attempts_clone = Arc::clone(&attempts);
482
483        let result: Result<i32> = with_retry(&config, "test", || {
484            let attempts = Arc::clone(&attempts_clone);
485            async move {
486                attempts.fetch_add(1, Ordering::SeqCst);
487                Err::<i32, _>(Error::ConnectionFailed {
488                    device_id: None,
489                    reason: ConnectionFailureReason::Other("persistent error".to_string()),
490                })
491            }
492        })
493        .await;
494
495        assert!(result.is_err());
496        assert_eq!(attempts.load(Ordering::SeqCst), 3); // 1 initial + 2 retries
497    }
498
499    #[tokio::test]
500    async fn test_with_retry_non_retryable_error() {
501        let config = RetryConfig::new(3);
502        let attempts = Arc::new(AtomicU32::new(0));
503        let attempts_clone = Arc::clone(&attempts);
504
505        let result: Result<i32> = with_retry(&config, "test", || {
506            let attempts = Arc::clone(&attempts_clone);
507            async move {
508                attempts.fetch_add(1, Ordering::SeqCst);
509                Err::<i32, _>(Error::InvalidData("not retryable".to_string()))
510            }
511        })
512        .await;
513
514        assert!(result.is_err());
515        assert_eq!(attempts.load(Ordering::SeqCst), 1); // No retries
516    }
517}