swink_agent/retry.rs
1//! Retry strategy trait and default exponential back-off implementation.
2//!
3//! The [`RetryStrategy`] trait defines the contract for deciding whether a
4//! failed model call should be retried and how long to wait before the next
5//! attempt. [`DefaultRetryStrategy`] provides exponential back-off with
6//! optional jitter, a configurable attempt cap, and a maximum delay ceiling.
7
8use std::time::Duration;
9
10use crate::error::AgentError;
11
12// ---------------------------------------------------------------------------
13// Trait
14// ---------------------------------------------------------------------------
15
16/// Determines whether a failed model call should be retried and, if so, how
17/// long to wait before the next attempt.
18///
19/// Implementations must be object-safe (`Send + Sync`) so that the strategy
20/// can be stored as `Box<dyn RetryStrategy>` inside loop configuration.
21pub trait RetryStrategy: Send + Sync {
22 /// Returns `true` if `error` on the given `attempt` number should be
23 /// retried. Attempt numbering starts at 1.
24 ///
25 /// This is the **sole decision point** for retryability — the agent loop
26 /// delegates entirely to this method. Custom implementations can retry
27 /// any error variant (e.g., [`AgentError::Plugin`]) without being gated
28 /// by [`AgentError::is_retryable()`].
29 fn should_retry(&self, error: &AgentError, attempt: u32) -> bool;
30
31 /// Returns the duration to wait before attempt number `attempt`.
32 /// Attempt numbering starts at 1.
33 fn delay(&self, attempt: u32) -> Duration;
34
35 /// Downcast helper for type-safe access to concrete strategy types.
36 ///
37 /// Used by [`AgentOptions::to_config`](crate::AgentOptions::to_config) to
38 /// extract serializable parameters from [`DefaultRetryStrategy`].
39 fn as_any(&self) -> &dyn std::any::Any;
40}
41
42// ---------------------------------------------------------------------------
43// Default implementation
44// ---------------------------------------------------------------------------
45
46/// Exponential back-off retry strategy with optional jitter.
47///
48/// Only transient errors ([`AgentError::ModelThrottled`] and
49/// [`AgentError::NetworkError`]) are retried. All other error variants are
50/// considered non-retryable and cause an immediate exit.
51///
52/// # Defaults
53///
54/// | Field | Default |
55/// |---|---|
56/// | `max_attempts` | 3 |
57/// | `base_delay` | 1 second |
58/// | `max_delay` | 60 seconds |
59/// | `multiplier` | 2.0 |
60/// | `jitter` | `true` |
61#[derive(Debug, Clone)]
62pub struct DefaultRetryStrategy {
63 /// Maximum number of attempts (including the first). The strategy returns
64 /// `false` from `should_retry` once `attempt >= max_attempts`.
65 pub max_attempts: u32,
66
67 /// Base delay before the first retry (attempt 1).
68 pub base_delay: Duration,
69
70 /// Upper bound on the computed delay — the delay is capped at this value
71 /// regardless of the exponential growth.
72 pub max_delay: Duration,
73
74 /// Multiplicative factor applied per attempt.
75 pub multiplier: f64,
76
77 /// When `true`, the computed delay is multiplied by a random factor in
78 /// `[0.5, 1.5)` to spread out retries across concurrent callers.
79 pub jitter: bool,
80}
81
82impl Default for DefaultRetryStrategy {
83 fn default() -> Self {
84 Self {
85 max_attempts: 3,
86 base_delay: Duration::from_secs(1),
87 max_delay: Duration::from_secs(60),
88 multiplier: 2.0,
89 jitter: true,
90 }
91 }
92}
93
94impl DefaultRetryStrategy {
95 /// Set the maximum number of attempts.
96 #[must_use]
97 pub const fn with_max_attempts(mut self, n: u32) -> Self {
98 self.max_attempts = n;
99 self
100 }
101
102 /// Set the base delay before the first retry.
103 #[must_use]
104 pub const fn with_base_delay(mut self, d: Duration) -> Self {
105 self.base_delay = d;
106 self
107 }
108
109 /// Set the maximum delay cap.
110 #[must_use]
111 pub const fn with_max_delay(mut self, d: Duration) -> Self {
112 self.max_delay = d;
113 self
114 }
115
116 /// Set the exponential multiplier.
117 #[must_use]
118 pub const fn with_multiplier(mut self, m: f64) -> Self {
119 self.multiplier = m;
120 self
121 }
122
123 /// Enable or disable jitter.
124 #[must_use]
125 pub const fn with_jitter(mut self, j: bool) -> Self {
126 self.jitter = j;
127 self
128 }
129}
130
131impl RetryStrategy for DefaultRetryStrategy {
132 fn should_retry(&self, error: &AgentError, attempt: u32) -> bool {
133 if attempt >= self.max_attempts {
134 return false;
135 }
136 error.is_retryable()
137 }
138
139 fn as_any(&self) -> &dyn std::any::Any {
140 self
141 }
142
143 fn delay(&self, attempt: u32) -> Duration {
144 // Exponential back-off: base_delay * multiplier^(attempt - 1)
145 let exp = self
146 .multiplier
147 .powi(attempt.saturating_sub(1).try_into().unwrap_or(i32::MAX));
148 let base_secs = self.base_delay.as_secs_f64() * exp;
149
150 // Cap at max_delay.
151 let capped_secs = base_secs.min(self.max_delay.as_secs_f64());
152
153 // Optionally apply jitter: multiply by a random factor in [0.5, 1.5).
154 let final_secs = if self.jitter {
155 let jitter_factor = 0.5 + rand::random::<f64>(); // [0.5, 1.5)
156 capped_secs * jitter_factor
157 } else {
158 capped_secs
159 };
160
161 Duration::from_secs_f64(final_secs)
162 }
163}