tor_circmgr/err.rs
1//! Declare an error type for tor-circmgr
2
3use std::sync::Arc;
4
5use futures::task::SpawnError;
6use retry_error::RetryError;
7use thiserror::Error;
8
9use oneshot_fused_workaround as oneshot;
10use tor_error::{Bug, ErrorKind, HasKind, HasRetryTime};
11use tor_linkspec::{LoggedChanTarget, OwnedChanTarget};
12use tor_proto::circuit::UniqId;
13use web_time_compat::Instant;
14
15use crate::mgr::RestrictionFailed;
16
17/// An error returned while looking up or building a circuit
18#[derive(Error, Debug, Clone)]
19#[non_exhaustive]
20pub enum Error {
21 /// We started building a circuit on a guard, but later decided not
22 /// to use that guard.
23 #[error("Discarded circuit {} because of speculative guard selection", _0.display_chan_circ())]
24 GuardNotUsable(UniqId),
25
26 /// We were waiting on a pending circuit, but it failed to report
27 #[error("Pending circuit(s) failed without reporting status")]
28 PendingCanceled,
29
30 /// We were waiting on a pending circuit, but it failed.
31 #[error("Circuit we were waiting for failed to complete")]
32 PendingFailed(#[source] Box<Error>),
33
34 /// We were told that we could use a given circuit, but before we got a
35 /// chance to try it, its usage changed so that we had no longer find
36 /// it suitable.
37 ///
38 /// This is a version of `UsageMismatched` for when a race is the
39 /// likeliest explanation for the mismatch.
40 #[error("Circuit seemed suitable, but another request got it first")]
41 LostUsabilityRace(#[source] RestrictionFailed),
42
43 /// A circuit succeeded, but was cancelled before it could be used.
44 ///
45 /// Circuits can be cancelled either by a call to
46 /// `retire_all_circuits()`, or by a configuration change that
47 /// makes old paths unusable.
48 //
49 // TODO: ideally this would also include the circuit identifier (e.g. its UniqId).
50 // However, this would mean making Error generic over Id,
51 // (this variant is constructed in AbstractCircMgr::do_launch,
52 // where the circuit ID is generic).
53 #[error("Circuit canceled")]
54 CircCanceled,
55
56 /// We were told that we could use a circuit, but when we tried, we found
57 /// that its usage did not support what we wanted.
58 ///
59 /// This can happen due to a race when a number of tasks all decide that
60 /// they can use the same pending circuit at once: one of them will restrict
61 /// the circuit, and the others will get this error.
62 ///
63 /// See `LostUsabilityRace`.
64 #[error("Couldn't apply circuit restriction")]
65 UsageMismatched(#[from] RestrictionFailed),
66
67 /// A circuit build took too long to finish.
68 #[error("Circuit{} took too long to build", OptUniqId(_0))]
69 CircTimeout(Option<UniqId>),
70
71 /// A request spent too long waiting for a circuit
72 #[error("Spent too long trying to construct circuits for this request")]
73 RequestTimeout,
74
75 /// Unable to find a relay in order to build a given path type.
76 #[error("Can't find {role} for {path_kind} circuit: {problem}")]
77 NoRelay {
78 /// The kind of path we were trying to build
79 path_kind: &'static str,
80 /// The kind of relay we were trying to pick
81 role: &'static str,
82 /// The problem we encountered
83 problem: String,
84 },
85
86 /// Problem creating or updating a guard manager.
87 #[error("Problem creating or updating guards list")]
88 GuardMgr(#[source] tor_guardmgr::GuardMgrError),
89
90 /// Problem selecting a guard relay.
91 #[error("Unable to select a guard relay")]
92 Guard(#[from] tor_guardmgr::PickGuardError),
93
94 /// Problem creating a vanguard manager.
95 #[cfg(all(feature = "vanguards", feature = "hs-common"))]
96 #[error("Unable to create vanguard manager")]
97 VanguardMgrInit(#[from] tor_guardmgr::vanguards::VanguardMgrError),
98
99 /// Unable to get or build a circuit, despite retrying.
100 #[error("{0}")]
101 RequestFailed(RetryError<Box<Error>>),
102
103 /// Problem with channel
104 #[error("Problem opening a channel to {peer}")]
105 Channel {
106 /// Which relay we were trying to connect to
107 peer: LoggedChanTarget,
108
109 /// What went wrong
110 #[source]
111 cause: tor_chanmgr::Error,
112 },
113
114 /// Protocol issue while building a circuit.
115 #[error(
116 "Problem building circuit{}, while {}{}",
117 OptUniqId(unique_id),
118 action,
119 WithOptPeer(peer)
120 )]
121 Protocol {
122 /// The action that we were trying to take.
123 action: &'static str,
124 /// The peer that created the protocol error.
125 ///
126 /// This is set to None if we can't blame a single party.
127 peer: Option<LoggedChanTarget>,
128 /// The underlying error.
129 #[source]
130 error: tor_proto::Error,
131 /// The UniqId of the circuit.
132 unique_id: Option<UniqId>,
133 },
134
135 /// Unable to spawn task
136 #[error("Unable to spawn {spawning}")]
137 Spawn {
138 /// What we were trying to spawn
139 spawning: &'static str,
140 /// What happened when we tried to spawn it.
141 #[source]
142 cause: Arc<SpawnError>,
143 },
144
145 /// Problem loading or storing persistent state.
146 #[error("Problem loading or storing state")]
147 State(#[from] tor_persist::Error),
148
149 /// An error caused by a programming issue . or a failure in another
150 /// library that we can't work around.
151 #[error("Programming error")]
152 Bug(#[from] Bug),
153}
154
155tor_error::define_asref_dyn_std_error!(Error);
156tor_error::define_asref_dyn_std_error!(Box<Error>);
157
158impl From<oneshot::Canceled> for Error {
159 fn from(_: oneshot::Canceled) -> Error {
160 Error::PendingCanceled
161 }
162}
163
164impl From<tor_guardmgr::GuardMgrError> for Error {
165 fn from(err: tor_guardmgr::GuardMgrError) -> Error {
166 match err {
167 tor_guardmgr::GuardMgrError::State(e) => Error::State(e),
168 _ => Error::GuardMgr(err),
169 }
170 }
171}
172
173impl HasKind for Error {
174 fn kind(&self) -> ErrorKind {
175 use Error as E;
176 use ErrorKind as EK;
177 match self {
178 E::Channel { cause, .. } => cause.kind(),
179 E::Bug(e) => e.kind(),
180 E::NoRelay { .. } => EK::NoPath,
181 E::PendingCanceled => EK::ReactorShuttingDown,
182 E::PendingFailed(e) => e.kind(),
183 E::CircTimeout(_) => EK::TorNetworkTimeout,
184 E::GuardNotUsable(_) => EK::TransientFailure,
185 E::UsageMismatched(_) => EK::Internal,
186 E::LostUsabilityRace(_) => EK::TransientFailure,
187 E::RequestTimeout => EK::TorNetworkTimeout,
188 E::RequestFailed(errs) => E::summarized_error_kind(errs.sources().map(AsRef::as_ref)),
189 E::CircCanceled => EK::TransientFailure,
190 E::Protocol { error, .. } => error.kind(),
191 E::State(e) => e.kind(),
192 E::GuardMgr(e) => e.kind(),
193 E::Guard(e) => e.kind(),
194 #[cfg(all(feature = "vanguards", feature = "hs-common"))]
195 E::VanguardMgrInit(e) => e.kind(),
196 E::Spawn { cause, .. } => cause.kind(),
197 }
198 }
199}
200
201impl HasRetryTime for Error {
202 fn retry_time(&self) -> tor_error::RetryTime {
203 use Error as E;
204 use tor_error::RetryTime as RT;
205
206 match self {
207 // If we fail because of a timeout, there is no need to wait before trying again.
208 E::CircTimeout(_) | E::RequestTimeout => RT::Immediate,
209
210 // If a circuit that seemed usable was restricted before we got a
211 // chance to try it, that's not our fault: we can try again
212 // immediately.
213 E::LostUsabilityRace(_) => RT::Immediate,
214
215 // If we can't build a path for the usage at all, then retrying
216 // won't help.
217 //
218 // TODO: In some rare cases, these errors can actually happen when
219 // we have walked ourselves into a snag in our path selection. See
220 // additional "TODO" comments in exitpath.rs.
221 E::NoRelay { .. } => RT::Never,
222
223 // If we encounter UsageMismatched without first converting to
224 // LostUsabilityRace, it reflects a real problem in our code.
225 E::UsageMismatched(_) => RT::Never,
226
227 // These don't reflect a real problem in the circuit building, but
228 // rather mean that we were waiting for something that didn't pan out.
229 // It's okay to try again after a short delay.
230 E::GuardNotUsable(_) | E::PendingCanceled | E::CircCanceled | E::Protocol { .. } => {
231 RT::AfterWaiting
232 }
233
234 // For Channel errors, we can mostly delegate the retry_time decision to
235 // the inner error.
236 //
237 // (We have to handle UnusableTarget specially, since it just means
238 // that we picked a guard or fallback we couldn't use. A channel to
239 // _that_ target will never succeed, but circuit operations using it
240 // will do fine.)
241 E::Channel {
242 cause: tor_chanmgr::Error::UnusableTarget(_),
243 ..
244 } => RT::AfterWaiting,
245 E::Channel { cause, .. } => cause.retry_time(),
246
247 // These errors are safe to delegate.
248 E::Guard(e) => e.retry_time(),
249 E::PendingFailed(e) => e.retry_time(),
250
251 // When we encounter a bunch of errors, choose the earliest.
252 E::RequestFailed(errors) => {
253 RT::earliest_approx(errors.sources().map(|err| err.retry_time()))
254 .unwrap_or(RT::Never)
255 }
256
257 #[cfg(all(feature = "vanguards", feature = "hs-common"))]
258 E::VanguardMgrInit(_) => RT::Never,
259
260 // These all indicate an internal error, or an error that shouldn't
261 // be able to happen when we're building a circuit.
262 E::Spawn { .. } | E::GuardMgr(_) | E::State(_) | E::Bug(_) => RT::Never,
263 }
264 }
265
266 fn abs_retry_time<F>(&self, now: Instant, choose_delay: F) -> tor_error::AbsRetryTime
267 where
268 F: FnOnce() -> std::time::Duration,
269 {
270 match self {
271 // We special-case this kind of problem, since we want to choose the
272 // earliest valid retry time.
273 Self::RequestFailed(errors) => tor_error::RetryTime::earliest_absolute(
274 errors.sources().map(|err| err.retry_time()),
275 now,
276 choose_delay,
277 )
278 .unwrap_or(tor_error::AbsRetryTime::Never),
279
280 // For everything else, we just delegate.
281 _ => self.retry_time().absolute(now, choose_delay),
282 }
283 }
284}
285
286impl Error {
287 /// Construct a new `Error` from a `SpawnError`.
288 pub(crate) fn from_spawn(spawning: &'static str, err: SpawnError) -> Error {
289 Error::Spawn {
290 spawning,
291 cause: Arc::new(err),
292 }
293 }
294
295 /// Return an integer representing the relative severity of this error.
296 ///
297 /// Used to determine which error to use when determining the kind of a retry error.
298 fn severity(&self) -> usize {
299 use Error as E;
300 match self {
301 E::GuardNotUsable(_) | E::LostUsabilityRace(_) => 10,
302 E::PendingCanceled => 20,
303 E::CircCanceled => 20,
304 E::CircTimeout(_) => 30,
305 E::RequestTimeout => 30,
306 E::NoRelay { .. } => 40,
307 E::GuardMgr(_) => 40,
308 E::Guard(_) => 40,
309 #[cfg(all(feature = "vanguards", feature = "hs-common"))]
310 E::VanguardMgrInit(_) => 40,
311 E::RequestFailed(_) => 40,
312 E::Channel { .. } => 40,
313 E::Protocol { .. } => 45,
314 E::Spawn { .. } => 90,
315 E::State(_) => 90,
316 E::UsageMismatched(_) => 90,
317 E::Bug(_) => 100,
318 E::PendingFailed(e) => e.severity(),
319 }
320 }
321
322 /// Return true if this error should not count against our total number of
323 /// failures.
324 ///
325 /// We count an error as an "internal reset" if it can happen in normal
326 /// operation and doesn't indicate a real problem with building a circuit, so much as an externally generated "need to retry".
327 pub(crate) fn is_internal_reset(&self) -> bool {
328 match self {
329 // This error is a reset because we expect it to happen while
330 // we're picking guards; if it happens, it means that we now know a
331 // good guard that we should have used instead.
332 Error::GuardNotUsable(_) => true,
333 // This error is a reset because it can only happen on the basis
334 // of a caller action (for example, a decision to reconfigure the
335 // `CircMgr`). If it happens, it just means that we should try again
336 // with the new configuration.
337 Error::CircCanceled => true,
338 // This error is a reset because it doesn't indicate anything wrong
339 // with the circuit: it just means that multiple requests all wanted
340 // to use the circuit at once, and they turned out not to be
341 // compatible with one another after the circuit was built.
342 Error::LostUsabilityRace(_) => true,
343 #[cfg(all(feature = "vanguards", feature = "hs-common"))]
344 Error::VanguardMgrInit(_) => false,
345 Error::PendingCanceled
346 | Error::PendingFailed(_)
347 | Error::UsageMismatched(_)
348 | Error::CircTimeout(_)
349 | Error::RequestTimeout
350 | Error::NoRelay { .. }
351 | Error::GuardMgr(_)
352 | Error::Guard(_)
353 | Error::RequestFailed(_)
354 | Error::Channel { .. }
355 | Error::Protocol { .. }
356 | Error::Spawn { .. }
357 | Error::State(_)
358 | Error::Bug(_) => false,
359 }
360 }
361
362 /// Return a list of the peers to "blame" for this error, if there are any.
363 pub fn peers(&self) -> Vec<&OwnedChanTarget> {
364 match self {
365 Error::RequestFailed(errors) => errors.sources().flat_map(|e| e.peers()).collect(),
366 Error::Channel { peer, .. } => vec![peer.as_inner()],
367 Error::Protocol {
368 peer: Some(peer), ..
369 } => vec![peer.as_inner()],
370 _ => vec![],
371 }
372 }
373
374 /// Given an iterator of errors that have occurred while attempting a single
375 /// failed operation, return the [`ErrorKind`] for the entire attempt.
376 pub fn summarized_error_kind<'a, I>(errs: I) -> ErrorKind
377 where
378 I: Iterator<Item = &'a Error>,
379 {
380 errs.max_by_key(|e| e.severity())
381 .map(|e| e.kind())
382 .unwrap_or(ErrorKind::Internal)
383 }
384}
385
386/// A failure to build any preemptive circuits, with at least one error
387/// condition.
388///
389/// This is a separate type since we never report it outside the crate.
390#[derive(Debug)]
391pub(crate) struct PreemptiveCircError;
392
393/// Helper to display an optional peer, prefixed with the string " with".
394struct WithOptPeer<'a, T>(&'a Option<T>);
395
396impl<'a, T> std::fmt::Display for WithOptPeer<'a, T>
397where
398 T: std::fmt::Display,
399{
400 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
401 if let Some(peer) = self.0.as_ref() {
402 write!(f, " with {}", peer)
403 } else {
404 Ok(())
405 }
406 }
407}
408
409/// Helper to display an optional UniqId.
410struct OptUniqId<'a>(&'a Option<UniqId>);
411
412impl<'a> std::fmt::Display for OptUniqId<'a> {
413 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
414 if let Some(unique_id) = self.0 {
415 write!(f, " {}", unique_id.display_chan_circ())
416 } else {
417 write!(f, "")
418 }
419 }
420}