tor_error/retriable.rs
1//! Declare the `RetryTime` enumeration and related code.
2
3use derive_more::{From, Into};
4use std::{cmp::Ordering, time::Duration};
5use strum::EnumDiscriminants;
6use web_time_compat::Instant;
7
8/// A description of when an operation may be retried.
9///
10/// # Retry times values are contextual.
11///
12/// Note that retrying is necessarily contextual, depending on what exactly
13/// we're talking about retrying.
14///
15/// For an example of how context matters: suppose that we try to build a
16/// circuit, and encounter a failure extending to the second hop. If we try to
17/// build a circuit _through the same path_ immediately, it's likely to fail
18/// again. But if we try to build a circuit through a different path, then
19/// there's no reason to expect that same kind of error.
20///
21/// Thus, the same inner error condition ("failed to extend to the nth hop") can
22/// indicate either a "Retry after waiting for a while" or "Retry immediately."
23///
24/// # Retry times depend on what we think might change.
25///
26/// Whether retrying will help depends on what we think is likely to change in
27/// the near term.
28///
29/// For example, we generally assume an unreachable relay has some likelihood of
30/// becoming reachable in the near future, and therefore connecting to such a
31/// relay is worth retrying.
32///
33/// On the other hand, we _don't_ assume that the network is changing wildly
34/// over time. Thus, if there is currently no relay that supports delivering
35/// traffic to port 23 (telnet), we say that building a request for such a relay
36/// is not retriable, even though technically such a relay might appear in the
37/// next consensus.
38#[derive(Copy, Clone, Debug, Eq, PartialEq, EnumDiscriminants)]
39#[non_exhaustive]
40// We define a discriminant type so we can simplify loose_cmp.
41#[strum_discriminants(derive(Ord, PartialOrd))]
42// We don't want to expose RetryTimeDiscriminants.
43#[strum_discriminants(vis())]
44pub enum RetryTime {
45 /// The operation can be retried immediately, and no delay is needed.
46 ///
47 /// The recipient of this `RetryTime` variant may retry the operation
48 /// immediately without waiting.
49 ///
50 /// This case should be used cautiously: it risks making code retry in a
51 /// loop without delay. It should only be used for error conditions that
52 /// are necessarily produced via a process that itself introduces a delay.
53 /// (For example, this case is suitable for errors caused by a remote
54 /// timeout.)
55 Immediate,
56
57 /// The operation can be retried after a short delay, to prevent overloading
58 /// the network.
59 ///
60 /// The recipient of this `RetryTime` variant should delay a short amount of
61 /// time before retrying. The amount of time to delay should be randomized,
62 /// and should tend to grow larger the more failures there have been
63 /// recently for the given operation. (The `RetryDelay` type from
64 /// `tor-basic-utils` is suitable for managing this calculation.)
65 ///
66 /// This case should be used for problems that tend to be "self correcting",
67 /// such as remote server failures (the server might come back up).
68 AfterWaiting,
69
70 /// The operation can be retried after a particular delay.
71 ///
72 /// The recipient of this `RetryTime` variant should wait for at least the
73 /// given duration before retrying the operation.
74 ///
75 /// This case should only be used if there is some reason not to return
76 /// `AfterWaiting`: for example, if the implementor is providing their own
77 /// back-off algorithm instead of using `RetryDelay.`
78 ///
79 /// (This is a separate variant from `At`, since the constructor may not
80 /// have convenient access to (a mocked view of) the current time. If you
81 /// know that the current time is `now`, then `After(d)` is equivalent to
82 /// `At(now + d)`.)
83 After(Duration),
84
85 /// The operation can be retried at some particular time in the future.
86 ///
87 /// The recipient of this this `RetryTime` variant should wait until the
88 /// current time (as returned by `Instant::get` or `SleepProvider::now` as
89 /// appropriate) is at least this given instant.
90 ///
91 /// This case is appropriate for when we have a failure condition caused by
92 /// waiting for multiple other timeouts. (For example, if we believe that
93 /// all our guards are down, then we won't be able to try getting a guard
94 /// until the next time guard is scheduled to be marked as retriable.)
95 At(Instant),
96
97 /// Retrying is unlikely to make this operation succeed, unless something
98 /// else is fixed first.
99 ///
100 /// The recipient of this `RetryTime` variant should generally give up, and
101 /// stop retrying the given operation.
102 ///
103 /// We don't mean "literally" that the operation will never succeed: only
104 /// that retrying it in the near future without fixing the underlying cause
105 /// is unlikely to help.
106 ///
107 /// This case is appropriate for issues like misconfiguration, internal
108 /// errors, and requests for operations that the network doesn't support.
109 ///
110 /// This case is also appropriate for a problem that is "technically"
111 /// retriable, but where any resolution is likelier to take days or weeks
112 /// instead of minutes or hours.
113 Never,
114}
115
116/// A `RetryTime` wrapped so that it compares according to [`RetryTime::loose_cmp`]
117#[derive(From, Into, Copy, Clone, Debug, Eq, PartialEq)]
118pub struct LooseCmpRetryTime(RetryTime);
119
120/// Trait for an error object that can tell us when the operation which
121/// generated it can be retried.
122pub trait HasRetryTime {
123 /// Return the time when the operation that gave this error can be retried.
124 ///
125 /// See all caveats and explanations on [`RetryTime`].
126 fn retry_time(&self) -> RetryTime;
127
128 /// Return an absolute retry when the operation that gave this error can be
129 /// retried.
130 ///
131 /// Requires that `now` is the current time, and `choose_delay` is a
132 /// function to choose a delay for [`RetryTime::AfterWaiting`].
133 fn abs_retry_time<F>(&self, now: Instant, choose_delay: F) -> AbsRetryTime
134 where
135 F: FnOnce() -> Duration,
136 Self: Sized,
137 {
138 self.retry_time().absolute(now, choose_delay)
139 }
140}
141
142/// An absolute [`RetryTime`].
143///
144/// Unlike `RetryTime`, this type always denotes a particular instant in time.
145/// You can derive it using [`RetryTime::absolute`].
146#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
147#[allow(clippy::exhaustive_enums)]
148pub enum AbsRetryTime {
149 /// See [`RetryTime::Immediate`].
150 Immediate,
151 /// See [`RetryTime::At`].
152 At(Instant),
153 /// See [`RetryTime::Never`].
154 Never,
155}
156
157impl AbsRetryTime {
158 /// Construct an AbsRetryTime representing `base` + `plus`.
159 fn from_sum(base: Instant, plus: Duration) -> Self {
160 match base.checked_add(plus) {
161 Some(t) => AbsRetryTime::At(t),
162 None => AbsRetryTime::Never,
163 }
164 }
165}
166
167impl RetryTime {
168 /// Convert this [`RetryTime`] in to an absolute time.
169 ///
170 /// Requires that `now` is the current time, and `choose_delay` is a
171 /// function to choose a delay for [`RetryTime::AfterWaiting`].
172 pub fn absolute<F>(self, now: Instant, choose_delay: F) -> AbsRetryTime
173 where
174 F: FnOnce() -> Duration,
175 {
176 match self {
177 RetryTime::Immediate => AbsRetryTime::Immediate,
178 RetryTime::AfterWaiting => AbsRetryTime::from_sum(now, choose_delay()),
179 RetryTime::After(d) => AbsRetryTime::from_sum(now, d),
180 RetryTime::At(t) => AbsRetryTime::At(t),
181 RetryTime::Never => AbsRetryTime::Never,
182 }
183 }
184
185 /// Convert all the provided `items` into [`AbsRetryTime`] values, and
186 /// return the earliest one.
187 ///
188 /// Requires that `now` is the current time, and `choose_delay` is a
189 /// function to choose a delay for [`RetryTime::AfterWaiting`].
190 ///
191 /// Differs from `items.map(AbsRetryTime::absolute(now,
192 /// choose_delay)).min()` in that it calls `choose_delay` at most once.
193 pub fn earliest_absolute<I, F>(items: I, now: Instant, choose_delay: F) -> Option<AbsRetryTime>
194 where
195 I: Iterator<Item = RetryTime>,
196 F: FnOnce() -> Duration,
197 {
198 let chosen_delay = std::cell::LazyCell::new(|| AbsRetryTime::from_sum(now, choose_delay()));
199
200 items
201 .map(|item| match item {
202 RetryTime::AfterWaiting => *chosen_delay,
203 other => other.absolute(now, || unreachable!()),
204 })
205 .min()
206 }
207
208 /// Return the "approximately earliest" item for an iterator of retry times.
209 ///
210 /// This is necessarily an approximation, since we can't be sure what time
211 /// will be chosen if the retry is supposed to happen at a random time, and
212 /// therefore cannot tell whether `AfterWaiting` comes before or after
213 /// particular `At` and `After` instances.
214 ///
215 /// If you need an exact answer, use earliest_absolute.
216 pub fn earliest_approx<I>(items: I) -> Option<RetryTime>
217 where
218 I: Iterator<Item = RetryTime>,
219 {
220 items.min_by(|a, b| a.loose_cmp(b))
221 }
222
223 /// A loose-but-total comparison operator, suitable for choosing a retry
224 /// time when multiple attempts have failed.
225 ///
226 /// If you need an absolute comparison operator, convert to [`AbsRetryTime`] first.
227 ///
228 /// See also:
229 /// [`LooseCmpRetryTime`], a wrapper for `RetryTime` that uses this comparison.
230 pub fn loose_cmp(&self, other: &Self) -> Ordering {
231 use RetryTime as RT;
232
233 match (self, other) {
234 // When we have the same type with an internal embedded duration or time,
235 // we compare based on the duration or time.
236 (RT::After(d1), RetryTime::After(d2)) => d1.cmp(d2),
237 (RT::At(t1), RetryTime::At(t2)) => t1.cmp(t2),
238
239 // Otherwise, we compare based on discriminant type.
240 //
241 // This can't do a perfect "apples-to-apples" comparison for
242 // `AfterWaiting` vs `At` vs `After`, but at least it imposes a
243 // total order.
244 (a, b) => RetryTimeDiscriminants::from(a).cmp(&RetryTimeDiscriminants::from(b)),
245 }
246 }
247}
248
249impl Ord for LooseCmpRetryTime {
250 fn cmp(&self, other: &Self) -> Ordering {
251 self.0.loose_cmp(&other.0)
252 }
253}
254impl PartialOrd for LooseCmpRetryTime {
255 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
256 Some(self.cmp(other))
257 }
258}
259
260#[cfg(test)]
261mod test {
262 // @@ begin test lint list maintained by maint/add_warning @@
263 #![allow(clippy::bool_assert_comparison)]
264 #![allow(clippy::clone_on_copy)]
265 #![allow(clippy::dbg_macro)]
266 #![allow(clippy::mixed_attributes_style)]
267 #![allow(clippy::print_stderr)]
268 #![allow(clippy::print_stdout)]
269 #![allow(clippy::single_char_pattern)]
270 #![allow(clippy::unwrap_used)]
271 #![allow(clippy::unchecked_time_subtraction)]
272 #![allow(clippy::useless_vec)]
273 #![allow(clippy::needless_pass_by_value)]
274 //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
275
276 use super::*;
277 use web_time_compat::InstantExt;
278
279 #[test]
280 fn comparison() {
281 use RetryTime as RT;
282 let sec = Duration::from_secs(1);
283 let now = Instant::get();
284
285 let sorted = vec![
286 RT::Immediate,
287 RT::AfterWaiting,
288 RT::After(sec * 10),
289 RT::After(sec * 20),
290 RT::At(now),
291 RT::At(now + sec * 30),
292 RT::Never,
293 ];
294
295 // Verify that these objects are actually in loose-cmp sorted order.
296 for (i, a) in sorted.iter().enumerate() {
297 for (j, b) in sorted.iter().enumerate() {
298 assert_eq!(a.loose_cmp(b), i.cmp(&j));
299 }
300 }
301 }
302
303 #[test]
304 fn abs_comparison() {
305 use AbsRetryTime as ART;
306 let sec = Duration::from_secs(1);
307 let now = Instant::get();
308
309 let sorted = vec![
310 ART::Immediate,
311 ART::At(now),
312 ART::At(now + sec * 30),
313 ART::Never,
314 ];
315
316 // Verify that these objects are actually in loose-cmp sorted order.
317 for (i, a) in sorted.iter().enumerate() {
318 for (j, b) in sorted.iter().enumerate() {
319 assert_eq!(a.cmp(b), i.cmp(&j));
320 }
321 }
322 }
323
324 #[test]
325 fn earliest_absolute() {
326 let sec = Duration::from_secs(1);
327 let now = Instant::get();
328
329 let times = vec![RetryTime::AfterWaiting, RetryTime::Never];
330
331 let earliest = RetryTime::earliest_absolute(times.into_iter(), now, || sec);
332 assert_eq!(
333 earliest.expect("no absolute time"),
334 AbsRetryTime::At(now + sec)
335 );
336 }
337
338 #[test]
339 fn abs_from_sum() {
340 let base = Instant::get();
341 let delta = Duration::from_secs(1);
342 assert_eq!(
343 AbsRetryTime::from_sum(base, delta),
344 AbsRetryTime::At(base + delta)
345 );
346
347 assert_eq!(
348 AbsRetryTime::from_sum(base, Duration::MAX),
349 AbsRetryTime::Never
350 );
351 }
352}