1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//! Retry resilience middleware for services, applications, and libraries.
//!
//! This module provides automatic retry capabilities with configurable backoff strategies,
//! jitter, recovery classification, and comprehensive telemetry. The primary types are
//! [`Retry`] and [`RetryLayer`]:
//!
//! - [`Retry`] is the middleware that wraps an inner service and automatically retries failed operations
//! - [`RetryLayer`] is used to configure and construct the retry middleware
//!
//! # Quick Start
//!
//! ```rust
//! # use tick::Clock;
//! # use layered::{Execute, Service, Stack};
//! # use seatbelt::retry::{Retry, Backoff};
//! # use seatbelt::{RecoveryInfo, ResilienceContext};
//! # async fn example(clock: Clock) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
//! let context = ResilienceContext::new(&clock).name("my_service");
//!
//! let stack = (
//! Retry::layer("retry", &context)
//! .clone_input()
//! .recovery_with(|result, _| match result {
//! Ok(_) => RecoveryInfo::never(),
//! Err(_) => RecoveryInfo::retry(),
//! }),
//! Execute::new(my_operation),
//! );
//!
//! let service = stack.into_service();
//! let result = service.execute("input".to_string()).await;
//! # let _result = result;
//! # Ok(())
//! # }
//! # async fn my_operation(input: String) -> Result<String, String> { Ok(input) }
//! ```
//!
//! # Configuration
//!
//! The [`RetryLayer`] uses a type state pattern to enforce that all required properties are
//! configured before the layer can be built. This compile-time safety ensures that you cannot
//! accidentally create a retry layer without properly specifying input cloning and recovery logic:
//!
//! - [`clone_input_with`][RetryLayer::clone_input_with]: Required function to clone inputs for retries (Rust ownership requirement)
//! - [`recovery`][RetryLayer::recovery]: Required function to determine if an output should trigger a retry
//!
//! Each retry layer requires an identifier for telemetry purposes. This identifier should use
//! `snake_case` naming convention to maintain consistency across the codebase.
//!
//! # Thread Safety
//!
//! The [`Retry`] type is thread-safe and implements both `Send` and `Sync` as enforced by
//! the `Service` trait it implements. This allows retry middleware to be safely shared
//! across multiple threads and used in concurrent environments.
//!
//! # Retry Delay
//!
//! Retry delays are determined by the following priority order:
//!
//! 1. **Recovery Delay Override**: If [`RetryLayer::recovery_with`] returns
//! [`RecoveryInfo::retry().delay()`][crate::RecoveryInfo::delay] or [`RecoveryInfo::unavailable()`][crate::RecoveryInfo::unavailable] with a specific duration, this delay
//! is used directly.
//!
//! 2. **Backoff Strategy**: When no recovery delay is specified, delays are calculated using
//! the configured backoff strategy (Constant, Linear, or Exponential with default `10ms` base delay).
//!
//! # Defaults
//!
//! The retry middleware uses the following default values when optional configuration is not provided:
//!
//! | Parameter | Default Value | Description | Configured By |
//! |-----------|---------------|-------------|---------------|
//! | Max retry attempts | `3` (4 total) | Maximum retry attempts plus original call | [`max_retry_attempts`][RetryLayer::max_retry_attempts] |
//! | Base delay | `10` milliseconds | Base delay used for backoff calculations | [`base_delay`][RetryLayer::base_delay] |
//! | Backoff strategy | `Exponential` | Exponential backoff with base multiplier of 2 | [`backoff`][RetryLayer::backoff] |
//! | Jitter | `Enabled` | Adds randomness to delays to prevent thundering herds | [`use_jitter`][RetryLayer::use_jitter] |
//! | Max delay | `None` | No limit on maximum delay between retries | [`max_delay`][RetryLayer::max_delay] |
//! | Enable condition | Always enabled | Retry protection is applied to all requests | [`enable_if`][RetryLayer::enable_if], [`enable_always`][RetryLayer::enable_always], [`disable`][RetryLayer::disable] |
//!
//! The default base delay is optimized for **service-to-service** communication where low latency
//! is critical. For **client-to-service** scenarios (e.g., mobile apps, web frontend), consider
//! increasing the base delay to 1-2 seconds using [`base_delay`][RetryLayer::base_delay].
//!
//! # Telemetry
//!
//! ## Metrics
//!
//! - **Metric**: `resilience.event` (counter)
//! - **When**: Emitted once per attempt when the recovery classifier indicates the result is
//! recoverable, including the final attempt when maximum retries are exhausted
//! - **Attributes**:
//! - `resilience.pipeline.name`: Pipeline identifier from [`ResilienceContext::name`][crate::ResilienceContext::name]
//! - `resilience.strategy.name`: Retry identifier from [`Retry::layer`]
//! - `resilience.event.name`: Always `retry`
//! - `resilience.attempt.index`: Attempt index (0-based)
//! - `resilience.attempt.is_last`: Boolean indicating if this is the last retry attempt
//! - `resilience.attempt.recovery.kind`: The [`RecoveryKind`][crate::RecoveryKind] that triggered
//! the retry (e.g., `retry` or `unavailable`)
//!
//! # Examples
//!
//! ## Basic Usage
//!
//! This example demonstrates the basic usage of configuring and using retry middleware.
//!
//! ```rust
//! # use std::time::Duration;
//! # use tick::Clock;
//! # use layered::{Execute, Service, Stack};
//! # use seatbelt::retry::{Retry, Backoff};
//! # use seatbelt::{RecoveryInfo, ResilienceContext};
//! # async fn example(clock: Clock) -> Result<(), String> {
//! // Define common options for resilience middleware. The clock is runtime-specific and
//! // must be provided. See its documentation for details.
//! let context = ResilienceContext::new(&clock).name("example");
//!
//! let stack = (
//! Retry::layer("my_retry", &context)
//! // Required: how to clone inputs for retries
//! .clone_input()
//! // Required: determine if we should retry based on output
//! .recovery_with(|output: &Result<String, String>, _args| match output {
//! // These are demonstrative, real code will have more meaningful recovery detection
//! Ok(_) => RecoveryInfo::never(),
//! Err(msg) if msg.contains("transient") => RecoveryInfo::retry(),
//! Err(_) => RecoveryInfo::never(),
//! }),
//! Execute::new(execute_unreliable_operation),
//! );
//!
//! // Build the service
//! let service = stack.into_service();
//!
//! // Execute the service
//! let result = service.execute("test input".to_string()).await;
//! # let _result = result;
//! # Ok(())
//! # }
//! # async fn execute_unreliable_operation(input: String) -> Result<String, String> { Ok(input) }
//! ```
//!
//! ## Advanced Usage
//!
//! This example demonstrates advanced usage of the retry middleware, including custom backoff
//! strategies, delay generators, and retry callbacks.
//!
//! ```rust
//! # use std::time::Duration;
//! # use tick::Clock;
//! # use std::io;
//! # use layered::{Execute, Stack, Service};
//! # use seatbelt::retry::{Retry, Backoff};
//! # use seatbelt::{RecoveryInfo, ResilienceContext};
//! # async fn example(clock: Clock) -> Result<(), String> {
//! // Define common options for resilience middleware.
//! let context = ResilienceContext::new(&clock);
//!
//! let stack = (
//! Retry::layer("advanced_retry", &context)
//! .clone_input()
//! .recovery_with(|output: &Result<String, io::Error>, _args| match output {
//! Err(err) if err.kind() == io::ErrorKind::TimedOut => RecoveryInfo::retry().delay(Duration::from_secs(60)),
//! Err(_) => RecoveryInfo::never(),
//! Ok(_) => RecoveryInfo::never(),
//! })
//! // Optional configuration
//! .max_retry_attempts(5)
//! .base_delay(Duration::from_millis(200))
//! .backoff(Backoff::Exponential)
//! .use_jitter(true)
//! // Callback called just before the next retry
//! .on_retry(|output, args| {
//! println!(
//! "retrying, attempt: {}, delay: {}ms",
//! args.attempt(),
//! args.retry_delay().as_millis(),
//! );
//! }),
//! Execute::new(execute_unreliable_operation),
//! );
//!
//! // Build and execute the service
//! let service = stack.into_service();
//! let result = service.execute("test_timeout".to_string()).await;
//! # let _result = result;
//! # Ok(())
//! # }
//! # async fn execute_unreliable_operation(input: String) -> Result<String, io::Error> { Ok(input) }
//! ```
pub use crateAttempt;
pub use ;
pub use Backoff;
pub use DelayBackoff;
pub use ;
pub use RetryConfig;
pub use RetryLayer;
pub use Retry;
pub use RetryFuture;
pub use RetryShared;