tor_guardmgr/lib.rs
1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3// @@ begin lint list maintained by maint/add_warning @@
4#![allow(renamed_and_removed_lints)] // @@REMOVE_WHEN(ci_arti_stable)
5#![allow(unknown_lints)] // @@REMOVE_WHEN(ci_arti_nightly)
6#![warn(missing_docs)]
7#![warn(noop_method_call)]
8#![warn(unreachable_pub)]
9#![warn(clippy::all)]
10#![deny(clippy::await_holding_lock)]
11#![deny(clippy::cargo_common_metadata)]
12#![deny(clippy::cast_lossless)]
13#![deny(clippy::checked_conversions)]
14#![warn(clippy::cognitive_complexity)]
15#![deny(clippy::debug_assert_with_mut_call)]
16#![deny(clippy::exhaustive_enums)]
17#![deny(clippy::exhaustive_structs)]
18#![deny(clippy::expl_impl_clone_on_copy)]
19#![deny(clippy::fallible_impl_from)]
20#![deny(clippy::implicit_clone)]
21#![deny(clippy::large_stack_arrays)]
22#![warn(clippy::manual_ok_or)]
23#![deny(clippy::missing_docs_in_private_items)]
24#![warn(clippy::needless_borrow)]
25#![warn(clippy::needless_pass_by_value)]
26#![warn(clippy::option_option)]
27#![deny(clippy::print_stderr)]
28#![deny(clippy::print_stdout)]
29#![warn(clippy::rc_buffer)]
30#![deny(clippy::ref_option_ref)]
31#![warn(clippy::semicolon_if_nothing_returned)]
32#![warn(clippy::trait_duplication_in_bounds)]
33#![deny(clippy::unchecked_time_subtraction)]
34#![deny(clippy::unnecessary_wraps)]
35#![warn(clippy::unseparated_literal_suffix)]
36#![deny(clippy::unwrap_used)]
37#![deny(clippy::mod_module_files)]
38#![allow(clippy::let_unit_value)] // This can reasonably be done for explicitness
39#![allow(clippy::uninlined_format_args)]
40#![allow(clippy::significant_drop_in_scrutinee)] // arti/-/merge_requests/588/#note_2812945
41#![allow(clippy::result_large_err)] // temporary workaround for arti#587
42#![allow(clippy::needless_raw_string_hashes)] // complained-about code is fine, often best
43#![allow(clippy::needless_lifetimes)] // See arti#1765
44#![allow(mismatched_lifetime_syntaxes)] // temporary workaround for arti#2060
45#![allow(clippy::collapsible_if)] // See arti#2342
46#![deny(clippy::unused_async)]
47#![deny(clippy::string_slice)] // See arti#2571
48//! <!-- @@ end lint list maintained by maint/add_warning @@ -->
49
50// TODO #1645 (either remove this, or decide to have it everywhere)
51#![cfg_attr(not(all(feature = "full", feature = "experimental")), allow(unused))]
52
53// Glossary:
54// Primary guard
55// Sample
56// confirmed
57// filtered
58
59use derive_deftly::Deftly;
60use futures::channel::mpsc;
61use itertools::Either;
62use serde::{Deserialize, Serialize};
63use std::collections::HashMap;
64use std::net::SocketAddr;
65use std::sync::{Arc, Mutex, Weak};
66#[cfg(feature = "bridge-client")]
67use tor_error::internal;
68use tor_linkspec::{OwnedChanTarget, OwnedCircTarget, RelayId, RelayIdSet};
69use tor_netdir::NetDirProvider;
70use tor_proto::ClockSkew;
71use tor_rtcompat::SpawnExt;
72use tor_units::BoundedInt32;
73use tracing::{debug, info, instrument, trace, warn};
74use web_time_compat::{Duration, Instant, SystemTime};
75
76use tor_config::derive::prelude::*;
77use tor_config::{ExplicitOrAuto, impl_standard_builder};
78use tor_config::{ReconfigureError, impl_not_auto_value};
79use tor_config::{define_list_builder_accessors, define_list_builder_helper};
80use tor_netdir::{NetDir, Relay, params::NetParameters};
81use tor_persist::{DynStorageHandle, StateMgr};
82use tor_rtcompat::Runtime;
83
84#[cfg(feature = "bridge-client")]
85pub mod bridge;
86mod config;
87mod daemon;
88mod dirstatus;
89mod err;
90mod events;
91pub mod fallback;
92mod filter;
93mod guard;
94mod ids;
95mod pending;
96mod sample;
97mod skew;
98mod util;
99#[cfg(feature = "vanguards")]
100pub mod vanguards;
101
102#[cfg(not(feature = "bridge-client"))]
103#[path = "bridge_disabled.rs"]
104pub mod bridge;
105
106#[cfg(any(test, feature = "testing"))]
107pub use config::testing::TestConfig;
108
109#[cfg(test)]
110use oneshot_fused_workaround as oneshot;
111
112pub use config::GuardMgrConfig;
113pub use err::{GuardMgrConfigError, GuardMgrError, PickGuardError};
114pub use events::ClockSkewEvents;
115pub use filter::GuardFilter;
116pub use ids::FirstHopId;
117pub use pending::{GuardMonitor, GuardStatus, GuardUsable};
118pub use skew::SkewEstimate;
119
120#[cfg(feature = "vanguards")]
121pub use vanguards::VanguardMgrError;
122
123use pending::{PendingRequest, RequestId};
124use sample::{GuardSet, Universe, UniverseRef};
125
126use crate::ids::{FirstHopIdInner, GuardId};
127
128/// A "guard manager" that selects and remembers a persistent set of
129/// guard nodes.
130///
131/// This is a "handle"; clones of it share state.
132#[derive(Clone)]
133pub struct GuardMgr<R: Runtime> {
134 /// An asynchronous runtime object.
135 ///
136 /// GuardMgr uses this runtime for timing, timeouts, and spawning
137 /// tasks.
138 runtime: R,
139
140 /// Internal state for the guard manager.
141 inner: Arc<Mutex<GuardMgrInner>>,
142}
143
144/// Helper type that holds the data used by a [`GuardMgr`].
145///
146/// This would just be a [`GuardMgr`], except that it needs to sit inside
147/// a `Mutex` and get accessed by daemon tasks.
148struct GuardMgrInner {
149 /// Last time when marked all of our primary guards as retriable.
150 ///
151 /// We keep track of this time so that we can rate-limit
152 /// these attempts.
153 last_primary_retry_time: Instant,
154
155 /// Persistent guard manager state.
156 ///
157 /// This object remembers one or more persistent set of guards that we can
158 /// use, along with their relative priorities and statuses.
159 guards: GuardSets,
160
161 /// The current filter that we're using to decide which guards are
162 /// supported.
163 //
164 // TODO: This field is duplicated in the current active [`GuardSet`]; we
165 // should fix that.
166 filter: GuardFilter,
167
168 /// Configuration values derived from the consensus parameters.
169 ///
170 /// This is updated whenever the consensus parameters change.
171 params: GuardParams,
172
173 /// A mpsc channel, used to tell the task running in
174 /// [`daemon::report_status_events`] about a new event to monitor.
175 ///
176 /// This uses an `UnboundedSender` so that we don't have to await
177 /// while sending the message, which in turn allows the GuardMgr
178 /// API to be simpler. The risk, however, is that there's no
179 /// backpressure in the event that the task running
180 /// [`daemon::report_status_events`] fails to read from this
181 /// channel.
182 ctrl: mpsc::UnboundedSender<daemon::Msg>,
183
184 /// Information about guards that we've given out, but where we have
185 /// not yet heard whether the guard was successful.
186 ///
187 /// Upon leaning whether the guard was successful, the pending
188 /// requests in this map may be either moved to `waiting`, or
189 /// discarded.
190 ///
191 /// There can be multiple pending requests corresponding to the
192 /// same guard.
193 pending: HashMap<RequestId, PendingRequest>,
194
195 /// A list of pending requests for which we have heard that the
196 /// guard was successful, but we have not yet decided whether the
197 /// circuit may be used.
198 ///
199 /// There can be multiple waiting requests corresponding to the
200 /// same guard.
201 waiting: Vec<PendingRequest>,
202
203 /// A list of fallback directories used to access the directory system
204 /// when no other directory information is yet known.
205 fallbacks: fallback::FallbackState,
206
207 /// Location in which to store persistent state.
208 storage: DynStorageHandle<GuardSets>,
209
210 /// A sender object to publish changes in our estimated clock skew.
211 send_skew: postage::watch::Sender<Option<SkewEstimate>>,
212
213 /// A receiver object to hand out to observers who want to know about
214 /// changes in our estimated clock skew.
215 recv_skew: events::ClockSkewEvents,
216
217 /// A netdir provider that we can use for adding new guards when
218 /// insufficient guards are available.
219 ///
220 /// This has to be an Option so it can be initialized from None: at the
221 /// time a GuardMgr is created, there is no NetDirProvider for it to use.
222 netdir_provider: Option<Weak<dyn NetDirProvider>>,
223
224 /// A netdir provider that we can use for discovering bridge descriptors.
225 ///
226 /// This has to be an Option so it can be initialized from None: at the time
227 /// a GuardMgr is created, there is no BridgeDescProvider for it to use.
228 #[cfg(feature = "bridge-client")]
229 bridge_desc_provider: Option<Weak<dyn bridge::BridgeDescProvider>>,
230
231 /// A list of the bridges that we are configured to use, or "None" if we are
232 /// not configured to use bridges.
233 #[cfg(feature = "bridge-client")]
234 configured_bridges: Option<Arc<[bridge::BridgeConfig]>>,
235}
236
237/// A selector that tells us which [`GuardSet`] of several is currently in use.
238#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, strum::EnumIter)]
239enum GuardSetSelector {
240 /// The default guard set is currently in use: that's the one that we use
241 /// when we have no filter installed, or the filter permits most of the
242 /// guards on the network.
243 #[default]
244 Default,
245 /// A "restrictive" guard set is currently in use: that's the one that we
246 /// use when we have a filter that excludes a large fraction of the guards
247 /// on the network.
248 Restricted,
249 /// The "bridges" guard set is currently in use: we are selecting our guards
250 /// from among the universe of configured bridges.
251 #[cfg(feature = "bridge-client")]
252 Bridges,
253}
254
255/// Describes the [`Universe`] that a guard sample should take its guards from.
256#[derive(Clone, Copy, Debug, Eq, PartialEq)]
257enum UniverseType {
258 /// Take information from the network directory.
259 NetDir,
260 /// Take information from the configured bridges.
261 #[cfg(feature = "bridge-client")]
262 BridgeSet,
263}
264
265impl GuardSetSelector {
266 /// Return a description of which [`Universe`] this guard sample should take
267 /// its guards from.
268 fn universe_type(&self) -> UniverseType {
269 match self {
270 GuardSetSelector::Default | GuardSetSelector::Restricted => UniverseType::NetDir,
271 #[cfg(feature = "bridge-client")]
272 GuardSetSelector::Bridges => UniverseType::BridgeSet,
273 }
274 }
275}
276
277/// Persistent state for a guard manager, as serialized to disk.
278#[derive(Debug, Clone, Default, Serialize, Deserialize)]
279struct GuardSets {
280 /// Which set of guards is currently in use?
281 #[serde(skip)]
282 active_set: GuardSetSelector,
283
284 /// The default set of guards to use.
285 ///
286 /// We use this one when there is no filter, or the filter permits most of the
287 /// guards on the network.
288 default: GuardSet,
289
290 /// A guard set to use when we have a restrictive filter.
291 #[serde(default)]
292 restricted: GuardSet,
293
294 /// A guard set sampled from our configured bridges.
295 #[serde(default)]
296 #[cfg(feature = "bridge-client")]
297 bridges: GuardSet,
298
299 /// Unrecognized fields, including (possibly) other guard sets.
300 #[serde(flatten)]
301 remaining: HashMap<String, tor_persist::JsonValue>,
302}
303
304/// The key (filename) we use for storing our persistent guard state in the
305/// `StateMgr`.
306///
307/// We used to store this in a different format in a filename called
308/// "default_guards" (before Arti 0.1.0).
309const STORAGE_KEY: &str = "guards";
310
311/// A description of which circuits to retire because of a configuration change.
312///
313/// TODO(nickm): Eventually we will want to add a "Some" here, to support
314/// removing only those circuits that correspond to no-longer-usable guards.
315#[derive(Clone, Debug, Eq, PartialEq)]
316#[must_use]
317#[non_exhaustive]
318pub enum RetireCircuits {
319 /// There's no need to retire any circuits.
320 None,
321 /// All circuits should be retired.
322 All,
323}
324
325impl<R: Runtime> GuardMgr<R> {
326 /// Create a new "empty" guard manager and launch its background tasks.
327 ///
328 /// It won't be able to hand out any guards until a [`NetDirProvider`] has
329 /// been installed.
330 #[instrument(skip_all, level = "trace")]
331 pub fn new<S>(
332 runtime: R,
333 state_mgr: S,
334 config: &impl GuardMgrConfig,
335 ) -> Result<Self, GuardMgrError>
336 where
337 S: StateMgr + Send + Sync + 'static,
338 {
339 let (ctrl, rcv) = mpsc::unbounded();
340 let storage: DynStorageHandle<GuardSets> = state_mgr.create_handle(STORAGE_KEY);
341 // TODO(nickm): We should do something about the old state in
342 // `default_guards`. Probably it would be best to delete it. We could
343 // try to migrate it instead, but that's beyond the stability guarantee
344 // that we're getting at this stage of our (pre-0.1) development.
345 let state = storage.load()?.unwrap_or_default();
346
347 let (send_skew, recv_skew) = postage::watch::channel();
348 let recv_skew = ClockSkewEvents { inner: recv_skew };
349
350 let inner = Arc::new(Mutex::new(GuardMgrInner {
351 guards: state,
352 filter: GuardFilter::unfiltered(),
353 last_primary_retry_time: runtime.now(),
354 params: GuardParams::default(),
355 ctrl,
356 pending: HashMap::new(),
357 waiting: Vec::new(),
358 fallbacks: config.fallbacks().into(),
359 storage,
360 send_skew,
361 recv_skew,
362 netdir_provider: None,
363 #[cfg(feature = "bridge-client")]
364 bridge_desc_provider: None,
365 #[cfg(feature = "bridge-client")]
366 configured_bridges: None,
367 }));
368 #[cfg(feature = "bridge-client")]
369 {
370 let mut inner = inner.lock().expect("lock poisoned");
371 // TODO(nickm): This calls `GuardMgrInner::update`. Will we mind doing so before any
372 // providers are configured? I think not, but we should make sure.
373 let _: RetireCircuits =
374 inner.replace_bridge_config(config, runtime.wallclock(), runtime.now())?;
375 }
376 {
377 let weak_inner = Arc::downgrade(&inner);
378 let rt_clone = runtime.clone();
379 runtime
380 .spawn(daemon::report_status_events(rt_clone, weak_inner, rcv))
381 .map_err(|e| GuardMgrError::from_spawn("guard status event reporter", e))?;
382 }
383 {
384 let rt_clone = runtime.clone();
385 let weak_inner = Arc::downgrade(&inner);
386 runtime
387 .spawn(daemon::run_periodic(rt_clone, weak_inner))
388 .map_err(|e| GuardMgrError::from_spawn("periodic guard updater", e))?;
389 }
390 Ok(GuardMgr { runtime, inner })
391 }
392
393 /// Install a [`NetDirProvider`] for use by this guard manager.
394 ///
395 /// It will be used to keep the guards up-to-date with changes from the
396 /// network directory, and to find new guards when no NetDir is provided to
397 /// select_guard().
398 ///
399 /// TODO: we should eventually return some kind of a task handle from this
400 /// task, even though it is not strictly speaking periodic.
401 ///
402 /// The guardmgr retains only a `Weak` reference to `provider`,
403 /// `install_netdir_provider` downgrades it on entry,
404 // TODO add ref to document when https://gitlab.torproject.org/tpo/core/arti/-/issues/624
405 // is fixed. Also, maybe take an owned `Weak` to start with.
406 //
407 /// # Panics
408 ///
409 /// Panics if a [`NetDirProvider`] is already installed.
410 pub fn install_netdir_provider(
411 &self,
412 provider: &Arc<dyn NetDirProvider>,
413 ) -> Result<(), GuardMgrError> {
414 let weak_provider = Arc::downgrade(provider);
415 {
416 let mut inner = self.inner.lock().expect("Poisoned lock");
417 assert!(inner.netdir_provider.is_none());
418 inner.netdir_provider = Some(weak_provider.clone());
419 }
420 let weak_inner = Arc::downgrade(&self.inner);
421 let rt_clone = self.runtime.clone();
422 self.runtime
423 .spawn(daemon::keep_netdir_updated(
424 rt_clone,
425 weak_inner,
426 weak_provider,
427 ))
428 .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
429 Ok(())
430 }
431
432 /// Configure a new [`bridge::BridgeDescProvider`] for this [`GuardMgr`].
433 ///
434 /// It will be used to learn about changes in the set of available bridge
435 /// descriptors; we'll inform it whenever our desired set of bridge
436 /// descriptors changes.
437 ///
438 /// TODO: Same todo as in `install_netdir_provider` about task handles.
439 ///
440 /// # Panics
441 ///
442 /// Panics if a [`bridge::BridgeDescProvider`] is already installed.
443 #[cfg(feature = "bridge-client")]
444 pub fn install_bridge_desc_provider(
445 &self,
446 provider: &Arc<dyn bridge::BridgeDescProvider>,
447 ) -> Result<(), GuardMgrError> {
448 let weak_provider = Arc::downgrade(provider);
449 {
450 let mut inner = self.inner.lock().expect("Poisoned lock");
451 assert!(inner.bridge_desc_provider.is_none());
452 inner.bridge_desc_provider = Some(weak_provider.clone());
453 }
454
455 let weak_inner = Arc::downgrade(&self.inner);
456 let rt_clone = self.runtime.clone();
457 self.runtime
458 .spawn(daemon::keep_bridge_descs_updated(
459 rt_clone,
460 weak_inner,
461 weak_provider,
462 ))
463 .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
464
465 Ok(())
466 }
467
468 /// Flush our current guard state to the state manager, if there
469 /// is any unsaved state.
470 pub fn store_persistent_state(&self) -> Result<(), GuardMgrError> {
471 let inner = self.inner.lock().expect("Poisoned lock");
472 trace!("Flushing guard state to disk.");
473 inner.storage.store(&inner.guards)?;
474 Ok(())
475 }
476
477 /// Reload state from the state manager.
478 ///
479 /// We only call this method if we _don't_ have the lock on the state
480 /// files. If we have the lock, we only want to save.
481 #[instrument(level = "trace", skip_all)]
482 pub fn reload_persistent_state(&self) -> Result<(), GuardMgrError> {
483 let mut inner = self.inner.lock().expect("Poisoned lock");
484 if let Some(new_guards) = inner.storage.load()? {
485 inner.replace_guards_with(new_guards, self.runtime.wallclock(), self.runtime.now());
486 }
487 Ok(())
488 }
489
490 /// Switch from having an unowned persistent state to having an owned one.
491 ///
492 /// Requires that we hold the lock on the state files.
493 #[instrument(level = "trace", skip_all)]
494 pub fn upgrade_to_owned_persistent_state(&self) -> Result<(), GuardMgrError> {
495 let mut inner = self.inner.lock().expect("Poisoned lock");
496 debug_assert!(inner.storage.can_store());
497 let new_guards = inner.storage.load()?.unwrap_or_default();
498 let wallclock = self.runtime.wallclock();
499 let now = self.runtime.now();
500 inner.replace_guards_with(new_guards, wallclock, now);
501 Ok(())
502 }
503
504 /// Return true if `netdir` has enough information to safely become our new netdir.
505 pub fn netdir_is_sufficient(&self, netdir: &NetDir) -> bool {
506 let mut inner = self.inner.lock().expect("Poisoned lock");
507 if inner.guards.active_set.universe_type() != UniverseType::NetDir {
508 // If we aren't using the netdir, this isn't something we want to look at.
509 return true;
510 }
511 inner
512 .guards
513 .active_guards_mut()
514 .n_primary_without_id_info_in(netdir)
515 == 0
516 }
517
518 /// Mark every guard as potentially retriable, regardless of how recently we
519 /// failed to connect to it.
520 pub fn mark_all_guards_retriable(&self) {
521 let mut inner = self.inner.lock().expect("Poisoned lock");
522 inner.guards.active_guards_mut().mark_all_guards_retriable();
523 }
524
525 /// Configure this guardmgr to use a fixed [`NetDir`] instead of a provider.
526 ///
527 /// This function is for testing only, and is exclusive with
528 /// `install_netdir_provider`.
529 ///
530 /// # Panics
531 ///
532 /// Panics if any [`NetDirProvider`] has already been installed.
533 #[cfg(any(test, feature = "testing"))]
534 pub fn install_test_netdir(&self, netdir: &NetDir) {
535 use tor_netdir::testprovider::TestNetDirProvider;
536 let wallclock = self.runtime.wallclock();
537 let now = self.runtime.now();
538 let netdir_provider: Arc<dyn NetDirProvider> =
539 Arc::new(TestNetDirProvider::from(netdir.clone()));
540 self.install_netdir_provider(&netdir_provider)
541 .expect("Couldn't install testing network provider");
542
543 let mut inner = self.inner.lock().expect("Poisoned lock");
544 inner.update(wallclock, now);
545 }
546
547 /// Replace the configuration in this `GuardMgr` with `config`.
548 #[instrument(level = "trace", skip_all)]
549 pub fn reconfigure(
550 &self,
551 config: &impl GuardMgrConfig,
552 ) -> Result<RetireCircuits, ReconfigureError> {
553 let mut inner = self.inner.lock().expect("Poisoned lock");
554 // Change the set of configured fallbacks.
555 {
556 let mut fallbacks: fallback::FallbackState = config.fallbacks().into();
557 std::mem::swap(&mut inner.fallbacks, &mut fallbacks);
558 inner.fallbacks.take_status_from(fallbacks);
559 }
560 // If we are built to use bridges, change the bridge configuration.
561 #[cfg(feature = "bridge-client")]
562 {
563 let wallclock = self.runtime.wallclock();
564 let now = self.runtime.now();
565 Ok(inner.replace_bridge_config(config, wallclock, now)?)
566 }
567 // If we are built to use bridges, change the bridge configuration.
568 #[cfg(not(feature = "bridge-client"))]
569 {
570 Ok(RetireCircuits::None)
571 }
572 }
573
574 /// Replace the current [`GuardFilter`] used by this `GuardMgr`.
575 // TODO should this be part of the config?
576 pub fn set_filter(&self, filter: GuardFilter) {
577 let wallclock = self.runtime.wallclock();
578 let now = self.runtime.now();
579 let mut inner = self.inner.lock().expect("Poisoned lock");
580 inner.set_filter(filter, wallclock, now);
581 }
582
583 /// Select a guard for a given [`GuardUsage`].
584 ///
585 /// On success, we return a [`FirstHop`] object to identify which
586 /// guard we have picked, a [`GuardMonitor`] object that the
587 /// caller can use to report whether its attempt to use the guard
588 /// succeeded or failed, and a [`GuardUsable`] future that the
589 /// caller can use to decide whether a circuit built through the
590 /// guard is actually safe to use.
591 ///
592 /// That last point is important: It's okay to build a circuit
593 /// through the guard returned by this function, but you can't
594 /// actually use it for traffic unless the [`GuardUsable`] future
595 /// yields "true".
596 #[instrument(skip_all, level = "trace")]
597 pub fn select_guard(
598 &self,
599 usage: GuardUsage,
600 ) -> Result<(FirstHop, GuardMonitor, GuardUsable), PickGuardError> {
601 let now = self.runtime.now();
602 let wallclock = self.runtime.wallclock();
603
604 let mut inner = self.inner.lock().expect("Poisoned lock");
605
606 // (I am not 100% sure that we need to consider_all_retries here, but
607 // it should _probably_ not hurt.)
608 inner.guards.active_guards_mut().consider_all_retries(now);
609
610 let (origin, guard) = inner.select_guard_with_expand(&usage, now, wallclock)?;
611 trace!(?guard, ?usage, "Guard selected");
612
613 let (usable, usable_sender) = if origin.usable_immediately() {
614 (GuardUsable::new_usable_immediately(), None)
615 } else {
616 let (u, snd) = GuardUsable::new_uncertain();
617 (u, Some(snd))
618 };
619 let request_id = pending::RequestId::next();
620 let ctrl = inner.ctrl.clone();
621 let monitor = GuardMonitor::new(request_id, ctrl);
622
623 // Note that the network can be down even if all the primary guards
624 // are not yet marked as unreachable. But according to guard-spec we
625 // don't want to acknowledge the net as down before that point, since
626 // we don't mark all the primary guards as retriable unless
627 // we've been forced to non-primary guards.
628 let net_has_been_down =
629 if let Some(duration) = tor_proto::time_since_last_incoming_traffic() {
630 inner
631 .guards
632 .active_guards_mut()
633 .all_primary_guards_are_unreachable()
634 && duration >= inner.params.internet_down_timeout
635 } else {
636 // TODO: Is this the correct behavior in this case?
637 false
638 };
639
640 let pending_request = pending::PendingRequest::new(
641 guard.first_hop_id(),
642 usage,
643 usable_sender,
644 net_has_been_down,
645 );
646 inner.pending.insert(request_id, pending_request);
647
648 match &guard.sample {
649 Some(sample) => {
650 let guard_id = GuardId::from_relay_ids(&guard);
651 inner
652 .guards
653 .guards_mut(sample)
654 .record_attempt(&guard_id, now);
655 }
656 None => {
657 // We don't record attempts for fallbacks; we only care when
658 // they have failed.
659 }
660 }
661
662 Ok((guard, monitor, usable))
663 }
664
665 /// Record that _after_ we built a circuit with a guard, something described
666 /// in `external_failure` went wrong with it.
667 pub fn note_external_failure<T>(&self, identity: &T, external_failure: ExternalActivity)
668 where
669 T: tor_linkspec::HasRelayIds + ?Sized,
670 {
671 let now = self.runtime.now();
672 let mut inner = self.inner.lock().expect("Poisoned lock");
673 let ids = inner.lookup_ids(identity);
674 for id in ids {
675 match &id.0 {
676 FirstHopIdInner::Guard(sample, id) => {
677 inner
678 .guards
679 .guards_mut(sample)
680 .record_failure(id, Some(external_failure), now);
681 }
682 FirstHopIdInner::Fallback(id) => {
683 if external_failure == ExternalActivity::DirCache {
684 inner.fallbacks.note_failure(id, now);
685 }
686 }
687 }
688 }
689 }
690
691 /// Record that _after_ we built a circuit with a guard, some activity
692 /// described in `external_activity` was successful with it.
693 pub fn note_external_success<T>(&self, identity: &T, external_activity: ExternalActivity)
694 where
695 T: tor_linkspec::HasRelayIds + ?Sized,
696 {
697 let mut inner = self.inner.lock().expect("Poisoned lock");
698
699 inner.record_external_success(identity, external_activity, self.runtime.wallclock());
700 }
701
702 /// Return a stream of events about our estimated clock skew; these events
703 /// are `None` when we don't have enough information to make an estimate,
704 /// and `Some(`[`SkewEstimate`]`)` otherwise.
705 ///
706 /// Note that this stream can be lossy: if the estimate changes more than
707 /// one before you read from the stream, you might only get the most recent
708 /// update.
709 pub fn skew_events(&self) -> ClockSkewEvents {
710 let inner = self.inner.lock().expect("Poisoned lock");
711 inner.recv_skew.clone()
712 }
713
714 /// Ensure that the message queue is flushed before proceeding to
715 /// the next step. Used for testing.
716 #[cfg(test)]
717 async fn flush_msg_queue(&self) {
718 let (snd, rcv) = oneshot::channel();
719 let pingmsg = daemon::Msg::Ping(snd);
720 {
721 let inner = self.inner.lock().expect("Poisoned lock");
722 inner
723 .ctrl
724 .unbounded_send(pingmsg)
725 .expect("Guard observer task exited prematurely.");
726 }
727 let _ = rcv.await;
728 }
729}
730
731/// An activity that can succeed or fail, and whose success or failure can be
732/// attributed to a guard.
733#[derive(Copy, Clone, Debug, Eq, PartialEq)]
734#[non_exhaustive]
735pub enum ExternalActivity {
736 /// The activity of using the guard as a directory cache.
737 DirCache,
738}
739
740impl GuardSets {
741 /// Return a reference to the currently active set of guards.
742 ///
743 /// (That's easy enough for now, since there is never more than one set of
744 /// guards. But eventually that will change, as we add support for more
745 /// complex filter types, and for bridge relays. Those will use separate
746 /// `GuardSet` instances, and this accessor will choose the right one.)
747 fn active_guards(&self) -> &GuardSet {
748 self.guards(&self.active_set)
749 }
750
751 /// Return the set of guards corresponding to the provided selector.
752 fn guards(&self, selector: &GuardSetSelector) -> &GuardSet {
753 match selector {
754 GuardSetSelector::Default => &self.default,
755 GuardSetSelector::Restricted => &self.restricted,
756 #[cfg(feature = "bridge-client")]
757 GuardSetSelector::Bridges => &self.bridges,
758 }
759 }
760
761 /// Return a mutable reference to the currently active set of guards.
762 fn active_guards_mut(&mut self) -> &mut GuardSet {
763 self.guards_mut(&self.active_set.clone())
764 }
765
766 /// Return a mutable reference to the set of guards corresponding to the
767 /// provided selector.
768 fn guards_mut(&mut self, selector: &GuardSetSelector) -> &mut GuardSet {
769 match selector {
770 GuardSetSelector::Default => &mut self.default,
771 GuardSetSelector::Restricted => &mut self.restricted,
772 #[cfg(feature = "bridge-client")]
773 GuardSetSelector::Bridges => &mut self.bridges,
774 }
775 }
776
777 /// Update all non-persistent state for the guards in this object with the
778 /// state in `other`.
779 fn copy_status_from(&mut self, mut other: GuardSets) {
780 use strum::IntoEnumIterator;
781 for sample in GuardSetSelector::iter() {
782 self.guards_mut(&sample)
783 .copy_ephemeral_status_into_newly_loaded_state(std::mem::take(
784 other.guards_mut(&sample),
785 ));
786 }
787 self.active_set = other.active_set;
788 }
789}
790
791impl GuardMgrInner {
792 /// Look up the latest [`NetDir`] (if there is one) from our
793 /// [`NetDirProvider`] (if we have one).
794 fn timely_netdir(&self) -> Option<Arc<NetDir>> {
795 self.netdir_provider
796 .as_ref()
797 .and_then(Weak::upgrade)
798 .and_then(|np| np.timely_netdir().ok())
799 }
800
801 /// Look up the latest [`BridgeDescList`](bridge::BridgeDescList) (if there
802 /// is one) from our [`BridgeDescProvider`](bridge::BridgeDescProvider) (if
803 /// we have one).
804 #[cfg(feature = "bridge-client")]
805 fn latest_bridge_desc_list(&self) -> Option<Arc<bridge::BridgeDescList>> {
806 self.bridge_desc_provider
807 .as_ref()
808 .and_then(Weak::upgrade)
809 .map(|bp| bp.bridges())
810 }
811
812 /// Run a function that takes `&mut self` and an optional NetDir.
813 ///
814 /// We try to use the netdir from our [`NetDirProvider`] (if we have one).
815 /// Therefore, although its _parameters_ are suitable for every
816 /// [`GuardSet`], its _contents_ might not be. For those, call
817 /// [`with_opt_universe`](Self::with_opt_universe) instead.
818 //
819 // This function exists to handle the lifetime mess where sometimes the
820 // resulting NetDir will borrow from `netdir`, and sometimes it will borrow
821 // from an Arc returned by `self.latest_netdir()`.
822 fn with_opt_netdir<F, T>(&mut self, func: F) -> T
823 where
824 F: FnOnce(&mut Self, Option<&NetDir>) -> T,
825 {
826 if let Some(nd) = self.timely_netdir() {
827 func(self, Some(nd.as_ref()))
828 } else {
829 func(self, None)
830 }
831 }
832
833 /// Return the latest `BridgeSet` based on our `BridgeDescProvider` and our
834 /// configured bridges.
835 ///
836 /// Returns `None` if we are not configured to use bridges.
837 #[cfg(feature = "bridge-client")]
838 fn latest_bridge_set(&self) -> Option<bridge::BridgeSet> {
839 let bridge_config = self.configured_bridges.as_ref()?.clone();
840 let bridge_descs = self.latest_bridge_desc_list();
841 Some(bridge::BridgeSet::new(bridge_config, bridge_descs))
842 }
843
844 /// Run a function that takes `&mut self` and an optional [`UniverseRef`].
845 ///
846 /// We try to get a universe from the appropriate source for the current
847 /// active guard set.
848 fn with_opt_universe<F, T>(&mut self, func: F) -> T
849 where
850 F: FnOnce(&mut Self, Option<&UniverseRef>) -> T,
851 {
852 // TODO: it might be nice to make `func` take an GuardSet and a set of
853 // parameters, so we can't get the active set wrong. Doing that will
854 // require a fair amount of refactoring so that the borrow checker is
855 // happy, however.
856 match self.guards.active_set.universe_type() {
857 UniverseType::NetDir => {
858 if let Some(nd) = self.timely_netdir() {
859 func(self, Some(&UniverseRef::NetDir(nd)))
860 } else {
861 func(self, None)
862 }
863 }
864 #[cfg(feature = "bridge-client")]
865 UniverseType::BridgeSet => func(
866 self,
867 self.latest_bridge_set()
868 .map(UniverseRef::BridgeSet)
869 .as_ref(),
870 ),
871 }
872 }
873
874 /// Update the status of all guards in the active set, based on the passage
875 /// of time, our configuration, and the relevant Universe for our active
876 /// set.
877 #[instrument(skip_all, level = "trace")]
878 fn update(&mut self, wallclock: SystemTime, now: Instant) {
879 self.with_opt_netdir(|this, netdir| {
880 // Here we update our parameters from the latest NetDir, and check
881 // whether we need to change to a (non)-restrictive GuardSet based
882 // on those parameters and our configured filter.
883 //
884 // This uses a NetDir unconditionally, since we always want to take
885 // the network parameters our parameters from the consensus even if
886 // the guards themselves are from a BridgeSet.
887 this.update_active_set_params_and_filter(netdir);
888 });
889 self.with_opt_universe(|this, univ| {
890 // Now we update the set of guards themselves based on the
891 // Universe, which is either the latest NetDir, or the latest
892 // BridgeSet—depending on what the GuardSet wants.
893 Self::update_guardset_internal(
894 &this.params,
895 wallclock,
896 this.guards.active_set.universe_type(),
897 this.guards.active_guards_mut(),
898 univ,
899 );
900 #[cfg(feature = "bridge-client")]
901 this.update_desired_descriptors(now);
902 #[cfg(not(feature = "bridge-client"))]
903 let _ = now;
904 });
905 }
906
907 /// Replace our bridge configuration with the one from `new_config`.
908 #[cfg(feature = "bridge-client")]
909 #[instrument(level = "trace", skip_all)]
910 fn replace_bridge_config(
911 &mut self,
912 new_config: &impl GuardMgrConfig,
913 wallclock: SystemTime,
914 now: Instant,
915 ) -> Result<RetireCircuits, GuardMgrConfigError> {
916 match (&self.configured_bridges, new_config.bridges_enabled()) {
917 (None, false) => {
918 assert_ne!(
919 self.guards.active_set.universe_type(),
920 UniverseType::BridgeSet
921 );
922 return Ok(RetireCircuits::None); // nothing to do
923 }
924 (_, true) if !self.storage.can_store() => {
925 // TODO: Ideally we would try to upgrade, obtaining an exclusive lock,
926 // but `StorageHandle` currently lacks a method for that.
927 return Err(GuardMgrConfigError::NoLock("bridges configured".into()));
928 }
929 (Some(current_bridges), true) if new_config.bridges() == current_bridges.as_ref() => {
930 assert_eq!(
931 self.guards.active_set.universe_type(),
932 UniverseType::BridgeSet
933 );
934 return Ok(RetireCircuits::None); // nothing to do.
935 }
936 (_, true) => {
937 self.configured_bridges = Some(new_config.bridges().into());
938 self.guards.active_set = GuardSetSelector::Bridges;
939 }
940 (_, false) => {
941 self.configured_bridges = None;
942 self.guards.active_set = GuardSetSelector::Default;
943 }
944 }
945
946 // If we have gotten here, we have changed the set of bridges, changed
947 // which set is active, or changed them both. We need to make sure that
948 // our `GuardSet` object is up-to-date with our configuration.
949 self.update(wallclock, now);
950
951 // We also need to tell the caller that its circuits are no good any
952 // more.
953 //
954 // TODO(nickm): Someday we can do this more judiciously by retuning
955 // "Some" in the case where we're still using bridges but our new bridge
956 // set contains different elements; see comment on RetireCircuits.
957 //
958 // TODO(nickm): We could also safely return RetireCircuits::None if we
959 // are using bridges, and our new bridge list is a superset of the older
960 // one.
961 Ok(RetireCircuits::All)
962 }
963
964 /// Update our parameters, our selection (based on network parameters and
965 /// configuration), and make sure the active GuardSet has the right
966 /// configuration itself.
967 ///
968 /// We should call this whenever the NetDir's parameters change, or whenever
969 /// our filter changes. We do not need to call it for new elements arriving
970 /// in our Universe, since those do not affect anything here.
971 ///
972 /// We should also call this whenever a new GuardSet becomes active for any
973 /// reason _other_ than just having called this function.
974 ///
975 /// (This function is only invoked from `update`, which should be called
976 /// under the above circumstances.)
977 fn update_active_set_params_and_filter(&mut self, netdir: Option<&NetDir>) {
978 // Set the parameters. These always come from the NetDir, even if this
979 // is a bridge set.
980 if let Some(netdir) = netdir {
981 match GuardParams::try_from(netdir.params()) {
982 Ok(params) => self.params = params,
983 Err(e) => warn!("Unusable guard parameters from consensus: {}", e),
984 }
985
986 self.select_guard_set_based_on_filter(netdir);
987 }
988
989 // Change the filter, if it doesn't match what the guards have.
990 //
991 // TODO(nickm): We could use a "dirty" flag or something to decide
992 // whether we need to call set_filter, if this comparison starts to show
993 // up in profiles.
994 if self.guards.active_guards().filter() != &self.filter {
995 let restrictive = self.guards.active_set == GuardSetSelector::Restricted;
996 self.guards
997 .active_guards_mut()
998 .set_filter(self.filter.clone(), restrictive);
999 }
1000 }
1001
1002 /// Update the status of every guard in `active_guards`, and expand it as
1003 /// needed.
1004 ///
1005 /// This function doesn't take `&self`, to make sure that we are only
1006 /// affecting a single `GuardSet`, and to avoid confusing the borrow
1007 /// checker.
1008 ///
1009 /// We should call this whenever the contents of the universe have changed.
1010 ///
1011 /// We should also call this whenever a new GuardSet becomes active.
1012 fn update_guardset_internal<U: Universe>(
1013 params: &GuardParams,
1014 now: SystemTime,
1015 universe_type: UniverseType,
1016 active_guards: &mut GuardSet,
1017 universe: Option<&U>,
1018 ) -> ExtendedStatus {
1019 // Expire guards. Do that early, in case doing so makes it clear that
1020 // we need to grab more guards or mark others as primary.
1021 active_guards.expire_old_guards(params, now);
1022
1023 let extended = if let Some(universe) = universe {
1024 // TODO: This check here may be completely unnecessary. I inserted
1025 // it back in 5ac0fcb7ef603e0d14 because I was originally concerned
1026 // it might be undesirable to list a primary guard as "missing dir
1027 // info" (and therefore unusable) if we were expecting to get its
1028 // microdescriptor "very soon."
1029 //
1030 // But due to the other check in `netdir_is_sufficient`, we
1031 // shouldn't be installing a netdir until it has microdescs for all
1032 // of the (non-bridge) primary guards that it lists. - nickm
1033 let n = active_guards.n_primary_without_id_info_in(universe);
1034 if n > 0 && universe_type == UniverseType::NetDir {
1035 // We are missing the information from a NetDir needed to see
1036 // whether our primary guards are listed, so we shouldn't update
1037 // our guard status.
1038 //
1039 // We don't want to do this check if we are using bridges, since
1040 // a missing bridge descriptor is not guaranteed to temporary
1041 // problem in the same way that a missing microdescriptor is.
1042 // (When a bridge desc is missing, the bridge could be down or
1043 // unreachable, and nobody else can help us. But if a microdesc
1044 // is missing, we just need to find a cache that has it.)
1045 trace!(
1046 n_primary_without_id_info = n,
1047 "Not extending guardset, missing information."
1048 );
1049 return ExtendedStatus::No;
1050 }
1051 active_guards.update_status_from_dir(universe);
1052 active_guards.extend_sample_as_needed(now, params, universe)
1053 } else {
1054 trace!("Not extending guardset, no universe given.");
1055 ExtendedStatus::No
1056 };
1057
1058 active_guards.select_primary_guards(params);
1059
1060 extended
1061 }
1062
1063 /// If using bridges, tell the BridgeDescProvider which descriptors we want.
1064 /// We need to check this *after* we select our primary guards.
1065 #[cfg(feature = "bridge-client")]
1066 fn update_desired_descriptors(&mut self, now: Instant) {
1067 if self.guards.active_set.universe_type() != UniverseType::BridgeSet {
1068 return;
1069 }
1070
1071 let provider = self.bridge_desc_provider.as_ref().and_then(Weak::upgrade);
1072 let bridge_set = self.latest_bridge_set();
1073 if let (Some(provider), Some(bridge_set)) = (provider, bridge_set) {
1074 let desired: Vec<_> = self
1075 .guards
1076 .active_guards()
1077 .descriptors_to_request(now, &self.params)
1078 .into_iter()
1079 .flat_map(|guard| bridge_set.bridge_by_guard(guard))
1080 .cloned()
1081 .collect();
1082
1083 provider.set_bridges(&desired);
1084 }
1085 }
1086
1087 /// Replace the active guard state with `new_state`, preserving
1088 /// non-persistent state for any guards that are retained.
1089 #[instrument(level = "trace", skip_all)]
1090 fn replace_guards_with(
1091 &mut self,
1092 mut new_guards: GuardSets,
1093 wallclock: SystemTime,
1094 now: Instant,
1095 ) {
1096 std::mem::swap(&mut self.guards, &mut new_guards);
1097 self.guards.copy_status_from(new_guards);
1098 self.update(wallclock, now);
1099 }
1100
1101 /// Update which guard set is active based on the current filter and the
1102 /// provided netdir.
1103 ///
1104 /// After calling this function, the new guard set's filter may be
1105 /// out-of-date: be sure to call `set_filter` as appropriate.
1106 fn select_guard_set_based_on_filter(&mut self, netdir: &NetDir) {
1107 // In general, we'd like to use the restricted set if we're under the
1108 // threshold, and the default set if we're over the threshold. But if
1109 // we're sitting close to the threshold, we want to avoid flapping back
1110 // and forth, so we only change when we're more than 5% "off" from
1111 // whatever our current setting is.
1112 //
1113 // (See guard-spec section 2 for more information.)
1114 let offset = match self.guards.active_set {
1115 GuardSetSelector::Default => -0.05,
1116 GuardSetSelector::Restricted => 0.05,
1117 // If we're using bridges, then we don't switch between the other guard sets based on the filter at all.
1118 #[cfg(feature = "bridge-client")]
1119 GuardSetSelector::Bridges => return,
1120 };
1121 let frac_permitted = self.filter.frac_bw_permitted(netdir);
1122 let threshold = self.params.filter_threshold + offset;
1123 let new_choice = if frac_permitted < threshold {
1124 GuardSetSelector::Restricted
1125 } else {
1126 GuardSetSelector::Default
1127 };
1128
1129 if new_choice != self.guards.active_set {
1130 info!(
1131 "Guard selection changed; we are now using the {:?} guard set",
1132 &new_choice
1133 );
1134
1135 self.guards.active_set = new_choice;
1136
1137 if frac_permitted < self.params.extreme_threshold {
1138 warn!(
1139 "The number of guards permitted is smaller than the recommended minimum of {:.0}%.",
1140 self.params.extreme_threshold * 100.0,
1141 );
1142 }
1143 }
1144 }
1145
1146 /// Mark all of our primary guards as retriable, if we haven't done
1147 /// so since long enough before `now`.
1148 ///
1149 /// We want to call this function whenever a guard attempt succeeds,
1150 /// if the internet seemed to be down when the guard attempt was
1151 /// first launched.
1152 fn maybe_retry_primary_guards(&mut self, now: Instant) {
1153 // We don't actually want to mark our primary guards as
1154 // retriable more than once per internet_down_timeout: after
1155 // the first time, we would just be noticing the same "coming
1156 // back online" event more than once.
1157 let interval = self.params.internet_down_timeout;
1158 if self.last_primary_retry_time + interval <= now {
1159 debug!(
1160 "Successfully reached a guard after a while off the internet; marking all primary guards retriable."
1161 );
1162 self.guards
1163 .active_guards_mut()
1164 .mark_primary_guards_retriable();
1165 self.last_primary_retry_time = now;
1166 }
1167 }
1168
1169 /// Replace the current GuardFilter with `filter`.
1170 #[instrument(level = "trace", skip_all)]
1171 fn set_filter(&mut self, filter: GuardFilter, wallclock: SystemTime, now: Instant) {
1172 self.filter = filter;
1173 self.update(wallclock, now);
1174 }
1175
1176 /// Called when the circuit manager reports (via [`GuardMonitor`]) that
1177 /// a guard succeeded or failed.
1178 ///
1179 /// Changes the guard's status as appropriate, and updates the pending
1180 /// request as needed.
1181 #[allow(clippy::cognitive_complexity)]
1182 pub(crate) fn handle_msg(
1183 &mut self,
1184 request_id: RequestId,
1185 status: GuardStatus,
1186 skew: Option<ClockSkew>,
1187 runtime: &impl tor_rtcompat::SleepProvider,
1188 ) {
1189 if let Some(mut pending) = self.pending.remove(&request_id) {
1190 // If there was a pending request matching this RequestId, great!
1191 let guard_id = pending.guard_id();
1192 trace!(?guard_id, ?status, "Received report of guard status");
1193
1194 // First, handle the skew report (if any)
1195 if let Some(skew) = skew {
1196 let now = runtime.now();
1197 let observation = skew::SkewObservation { skew, when: now };
1198
1199 match &guard_id.0 {
1200 FirstHopIdInner::Guard(_, id) => {
1201 self.guards.active_guards_mut().record_skew(id, observation);
1202 }
1203 FirstHopIdInner::Fallback(id) => {
1204 self.fallbacks.note_skew(id, observation);
1205 }
1206 }
1207 // TODO: We call this whenever we receive an observed clock
1208 // skew. That's not the perfect timing for two reasons. First
1209 // off, it might be too frequent: it does an O(n) calculation,
1210 // which isn't ideal. Second, it might be too infrequent: after
1211 // an hour has passed, a given observation won't be up-to-date
1212 // any more, and we might want to recalculate the skew
1213 // accordingly.
1214 self.update_skew(now);
1215 }
1216
1217 match (status, &guard_id.0) {
1218 (GuardStatus::Failure, FirstHopIdInner::Fallback(id)) => {
1219 // We used a fallback, and we weren't able to build a circuit through it.
1220 self.fallbacks.note_failure(id, runtime.now());
1221 }
1222 (_, FirstHopIdInner::Fallback(_)) => {
1223 // We don't record any other kind of circuit activity if we
1224 // took the entry from the fallback list.
1225 }
1226 (GuardStatus::Success, FirstHopIdInner::Guard(sample, id)) => {
1227 // If we had gone too long without any net activity when we
1228 // gave out this guard, and now we're seeing a circuit
1229 // succeed, tell the primary guards that they might be
1230 // retriable.
1231 if pending.net_has_been_down() {
1232 self.maybe_retry_primary_guards(runtime.now());
1233 }
1234
1235 // The guard succeeded. Tell the GuardSet.
1236 self.guards.guards_mut(sample).record_success(
1237 id,
1238 &self.params,
1239 None,
1240 runtime.wallclock(),
1241 );
1242 // Either tell the request whether the guard is
1243 // usable, or schedule it as a "waiting" request.
1244 if let Some(usable) = self.guard_usability_status(&pending, runtime.now()) {
1245 trace!(?guard_id, usable, "Known usability status");
1246 pending.reply(usable);
1247 } else {
1248 // This is the one case where we can't use the
1249 // guard yet.
1250 trace!(?guard_id, "Not able to answer right now");
1251 pending.mark_waiting(runtime.now());
1252 self.waiting.push(pending);
1253 }
1254 }
1255 (GuardStatus::Failure, FirstHopIdInner::Guard(sample, id)) => {
1256 self.guards
1257 .guards_mut(sample)
1258 .record_failure(id, None, runtime.now());
1259 pending.reply(false);
1260 }
1261 (GuardStatus::AttemptAbandoned, FirstHopIdInner::Guard(sample, id)) => {
1262 self.guards.guards_mut(sample).record_attempt_abandoned(id);
1263 pending.reply(false);
1264 }
1265 (GuardStatus::Indeterminate, FirstHopIdInner::Guard(sample, id)) => {
1266 self.guards
1267 .guards_mut(sample)
1268 .record_indeterminate_result(id);
1269 pending.reply(false);
1270 }
1271 };
1272 } else {
1273 warn!(
1274 "Got a status {:?} for a request {:?} that wasn't pending",
1275 status, request_id
1276 );
1277 }
1278
1279 // We might need to update the primary guards based on changes in the
1280 // status of guards above.
1281 self.guards
1282 .active_guards_mut()
1283 .select_primary_guards(&self.params);
1284
1285 // Some waiting request may just have become ready (usable or
1286 // not); we need to give them the information they're waiting
1287 // for.
1288 self.expire_and_answer_pending_requests(runtime.now());
1289 }
1290
1291 /// Helper to implement `GuardMgr::note_external_success()`.
1292 ///
1293 /// (This has to be a separate function so that we can borrow params while
1294 /// we have `mut self` borrowed.)
1295 fn record_external_success<T>(
1296 &mut self,
1297 identity: &T,
1298 external_activity: ExternalActivity,
1299 now: SystemTime,
1300 ) where
1301 T: tor_linkspec::HasRelayIds + ?Sized,
1302 {
1303 for id in self.lookup_ids(identity) {
1304 match &id.0 {
1305 FirstHopIdInner::Guard(sample, id) => {
1306 self.guards.guards_mut(sample).record_success(
1307 id,
1308 &self.params,
1309 Some(external_activity),
1310 now,
1311 );
1312 }
1313 FirstHopIdInner::Fallback(id) => {
1314 if external_activity == ExternalActivity::DirCache {
1315 self.fallbacks.note_success(id);
1316 }
1317 }
1318 }
1319 }
1320 }
1321
1322 /// Return an iterator over all of the clock skew observations we've made
1323 /// for guards or fallbacks.
1324 fn skew_observations(&self) -> impl Iterator<Item = &skew::SkewObservation> {
1325 self.fallbacks
1326 .skew_observations()
1327 .chain(self.guards.active_guards().skew_observations())
1328 }
1329
1330 /// Recalculate our estimated clock skew, and publish it to anybody who
1331 /// cares.
1332 fn update_skew(&mut self, now: Instant) {
1333 let estimate = skew::SkewEstimate::estimate_skew(self.skew_observations(), now);
1334 // TODO: we might want to do this only conditionally, when the skew
1335 // estimate changes.
1336 *self.send_skew.borrow_mut() = estimate;
1337 }
1338
1339 /// If the circuit built because of a given [`PendingRequest`] may
1340 /// now be used (or discarded), return `Some(true)` or
1341 /// `Some(false)` respectively.
1342 ///
1343 /// Return None if we can't yet give an answer about whether such
1344 /// a circuit is usable.
1345 fn guard_usability_status(&self, pending: &PendingRequest, now: Instant) -> Option<bool> {
1346 match &pending.guard_id().0 {
1347 FirstHopIdInner::Guard(sample, id) => self.guards.guards(sample).circ_usability_status(
1348 id,
1349 pending.usage(),
1350 &self.params,
1351 now,
1352 ),
1353 // Fallback circuits are usable immediately, since we don't have to wait to
1354 // see whether any _other_ circuit succeeds or fails.
1355 FirstHopIdInner::Fallback(_) => Some(true),
1356 }
1357 }
1358
1359 /// For requests that have been "waiting" for an answer for too long,
1360 /// expire them and tell the circuit manager that their circuits
1361 /// are unusable.
1362 fn expire_and_answer_pending_requests(&mut self, now: Instant) {
1363 // A bit ugly: we use a separate Vec here to avoid borrowing issues,
1364 // and put it back when we're done.
1365 let mut waiting = Vec::new();
1366 std::mem::swap(&mut waiting, &mut self.waiting);
1367
1368 waiting.retain_mut(|pending| {
1369 let expired = pending
1370 .waiting_since()
1371 .and_then(|w| now.checked_duration_since(w))
1372 .map(|d| d >= self.params.np_idle_timeout)
1373 == Some(true);
1374 if expired {
1375 trace!(?pending, "Pending request expired");
1376 pending.reply(false);
1377 return false;
1378 }
1379
1380 // TODO-SPEC: guard_usability_status isn't what the spec says. It
1381 // says instead that we should look at _circuit_ status, saying:
1382 // " Definition: In the algorithm above, C2 "blocks" C1 if:
1383 // * C2 obeys all the restrictions that C1 had to obey, AND
1384 // * C2 has higher priority than C1, AND
1385 // * Either C2 is <complete>, or C2 is <waiting_for_better_guard>,
1386 // or C2 has been <usable_if_no_better_guard> for no more than
1387 // {NONPRIMARY_GUARD_CONNECT_TIMEOUT} seconds."
1388 //
1389 // See comments in sample::GuardSet::circ_usability_status.
1390
1391 if let Some(answer) = self.guard_usability_status(pending, now) {
1392 trace!(?pending, answer, "Pending request now ready");
1393 pending.reply(answer);
1394 return false;
1395 }
1396 true
1397 });
1398
1399 // Put the waiting list back.
1400 std::mem::swap(&mut waiting, &mut self.waiting);
1401 }
1402
1403 /// Return every currently extant FirstHopId for a guard or fallback
1404 /// directory matching (or possibly matching) the provided keys.
1405 ///
1406 /// An identity is _possibly matching_ if it contains some of the IDs in the
1407 /// provided identity, and it has no _contradictory_ identities, but it does
1408 /// not necessarily contain _all_ of those identities.
1409 ///
1410 /// # TODO
1411 ///
1412 /// This function should probably not exist; it's only used so that dirmgr
1413 /// can report successes or failures, since by the time it observes them it
1414 /// doesn't know whether its circuit came from a guard or a fallback. To
1415 /// solve that, we'll need CircMgr to record and report which one it was
1416 /// using, which will take some more plumbing.
1417 ///
1418 /// TODO relay: we will have to make the change above when we implement
1419 /// relays; otherwise, it would be possible for an attacker to exploit it to
1420 /// mislead us about our guard status.
1421 fn lookup_ids<T>(&self, identity: &T) -> Vec<FirstHopId>
1422 where
1423 T: tor_linkspec::HasRelayIds + ?Sized,
1424 {
1425 use strum::IntoEnumIterator;
1426 let mut vec = Vec::with_capacity(2);
1427
1428 let id = ids::GuardId::from_relay_ids(identity);
1429 for sample in GuardSetSelector::iter() {
1430 let guard_id = match self.guards.guards(&sample).contains(&id) {
1431 Ok(true) => &id,
1432 Err(other) => other,
1433 Ok(false) => continue,
1434 };
1435 vec.push(FirstHopId(FirstHopIdInner::Guard(sample, guard_id.clone())));
1436 }
1437
1438 let id = ids::FallbackId::from_relay_ids(identity);
1439 if self.fallbacks.contains(&id) {
1440 vec.push(id.into());
1441 }
1442
1443 vec
1444 }
1445
1446 /// Run any periodic events that update guard status, and return a
1447 /// duration after which periodic events should next be run.
1448 #[instrument(skip_all, level = "trace")]
1449 pub(crate) fn run_periodic_events(&mut self, wallclock: SystemTime, now: Instant) -> Duration {
1450 self.update(wallclock, now);
1451 self.expire_and_answer_pending_requests(now);
1452 Duration::from_secs(1) // TODO: Too aggressive.
1453 }
1454
1455 /// Try to select a guard, expanding the sample if the first attempt fails.
1456 #[instrument(skip_all, level = "trace")]
1457 fn select_guard_with_expand(
1458 &mut self,
1459 usage: &GuardUsage,
1460 now: Instant,
1461 wallclock: SystemTime,
1462 ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1463 // Try to find a guard.
1464 let first_error = match self.select_guard_once(usage, now) {
1465 Ok(res1) => return Ok(res1),
1466 Err(e) => {
1467 trace!("Couldn't select guard on first attempt: {}", e);
1468 e
1469 }
1470 };
1471
1472 // That didn't work. If we have a netdir, expand the sample and try again.
1473 let res = self.with_opt_universe(|this, univ| {
1474 let univ = univ?;
1475 trace!("No guards available, trying to extend the sample.");
1476 // Make sure that the status on all of our guards are accurate, and
1477 // expand the sample if we can.
1478 //
1479 // Our parameters and configuration did not change, so we do not
1480 // need to call update() or update_active_set_and_filter(). This
1481 // call is sufficient to extend the sample and recompute primary
1482 // guards.
1483 let extended = Self::update_guardset_internal(
1484 &this.params,
1485 wallclock,
1486 this.guards.active_set.universe_type(),
1487 this.guards.active_guards_mut(),
1488 Some(univ),
1489 );
1490 if extended == ExtendedStatus::Yes {
1491 match this.select_guard_once(usage, now) {
1492 Ok(res) => return Some(res),
1493 Err(e) => {
1494 trace!("Couldn't select guard after update: {}", e);
1495 }
1496 }
1497 }
1498 None
1499 });
1500 if let Some(res) = res {
1501 return Ok(res);
1502 }
1503
1504 // Okay, that didn't work either. If we were asked for a directory
1505 // guard, and we aren't using bridges, then we may be able to use a
1506 // fallback.
1507 if usage.kind == GuardUsageKind::OneHopDirectory
1508 && self.guards.active_set.universe_type() == UniverseType::NetDir
1509 {
1510 return self.select_fallback(now);
1511 }
1512
1513 // Couldn't extend the sample or use a fallback; return the original error.
1514 Err(first_error)
1515 }
1516
1517 /// Helper: try to pick a single guard, without retrying on failure.
1518 fn select_guard_once(
1519 &self,
1520 usage: &GuardUsage,
1521 now: Instant,
1522 ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1523 let active_set = &self.guards.active_set;
1524 #[cfg_attr(not(feature = "bridge-client"), allow(unused_mut))]
1525 let (list_kind, mut first_hop) =
1526 self.guards
1527 .guards(active_set)
1528 .pick_guard(active_set, usage, &self.params, now)?;
1529 #[cfg(feature = "bridge-client")]
1530 if self.guards.active_set.universe_type() == UniverseType::BridgeSet {
1531 // See if we can promote first_hop to a viable CircTarget.
1532 let bridges = self.latest_bridge_set().ok_or_else(|| {
1533 PickGuardError::Internal(internal!(
1534 "No bridge set available, even though this is the Bridges sample"
1535 ))
1536 })?;
1537 first_hop.lookup_bridge_circ_target(&bridges);
1538
1539 if usage.kind == GuardUsageKind::Data && !first_hop.contains_circ_target() {
1540 return Err(PickGuardError::Internal(internal!(
1541 "Tried to return a non-circtarget guard with Data usage!"
1542 )));
1543 }
1544 }
1545 Ok((list_kind, first_hop))
1546 }
1547
1548 /// Helper: Select a fallback directory.
1549 ///
1550 /// Called when we have no guard information to use. Return values are as
1551 /// for [`GuardMgr::select_guard()`]
1552 fn select_fallback(
1553 &self,
1554 now: Instant,
1555 ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1556 let filt = self.guards.active_guards().filter();
1557
1558 let fallback = crate::FirstHop {
1559 sample: None,
1560 inner: crate::FirstHopInner::Chan(OwnedChanTarget::from_chan_target(
1561 self.fallbacks.choose(&mut rand::rng(), now, filt)?,
1562 )),
1563 };
1564 let fallback = filt.modify_hop(fallback)?;
1565 Ok((sample::ListKind::Fallback, fallback))
1566 }
1567}
1568
1569/// A possible outcome of trying to extend a guard sample.
1570#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1571enum ExtendedStatus {
1572 /// The guard sample was extended. (At least one guard was added to it.)
1573 Yes,
1574 /// The guard sample was not extended.
1575 No,
1576}
1577
1578/// A set of parameters, derived from the consensus document, controlling
1579/// the behavior of a guard manager.
1580#[derive(Debug, Clone)]
1581#[cfg_attr(test, derive(PartialEq))]
1582struct GuardParams {
1583 /// How long should a sampled, un-confirmed guard be kept in the sample before it expires?
1584 lifetime_unconfirmed: Duration,
1585 /// How long should a confirmed guard be kept in the sample before
1586 /// it expires?
1587 lifetime_confirmed: Duration,
1588 /// How long may a guard be unlisted before we remove it from the sample?
1589 lifetime_unlisted: Duration,
1590 /// Largest number of guards we're willing to add to the sample.
1591 max_sample_size: usize,
1592 /// Largest fraction of the network's guard bandwidth that we're
1593 /// willing to add to the sample.
1594 max_sample_bw_fraction: f64,
1595 /// Smallest number of guards that we're willing to have in the
1596 /// sample, after applying a [`GuardFilter`].
1597 min_filtered_sample_size: usize,
1598 /// How many guards are considered "Primary"?
1599 n_primary: usize,
1600 /// When making a regular circuit, how many primary guards should we
1601 /// be willing to try?
1602 data_parallelism: usize,
1603 /// When making a one-hop directory circuit, how many primary
1604 /// guards should we be willing to try?
1605 dir_parallelism: usize,
1606 /// For how long does a pending attempt to connect to a guard
1607 /// block an attempt to use a less-favored non-primary guard?
1608 np_connect_timeout: Duration,
1609 /// How long do we allow a circuit to a successful but unfavored
1610 /// non-primary guard to sit around before deciding not to use it?
1611 np_idle_timeout: Duration,
1612 /// After how much time without successful activity does a
1613 /// successful circuit indicate that we should retry our primary
1614 /// guards?
1615 internet_down_timeout: Duration,
1616 /// What fraction of the guards can be can be filtered out before we
1617 /// decide that our filter is "very restrictive"?
1618 filter_threshold: f64,
1619 /// What fraction of the guards determine that our filter is "very
1620 /// restrictive"?
1621 extreme_threshold: f64,
1622}
1623
1624impl Default for GuardParams {
1625 fn default() -> Self {
1626 let one_day = Duration::from_secs(86400);
1627 GuardParams {
1628 lifetime_unconfirmed: one_day * 120,
1629 lifetime_confirmed: one_day * 60,
1630 lifetime_unlisted: one_day * 20,
1631 max_sample_size: 60,
1632 max_sample_bw_fraction: 0.2,
1633 min_filtered_sample_size: 20,
1634 n_primary: 3,
1635 data_parallelism: 1,
1636 dir_parallelism: 3,
1637 np_connect_timeout: Duration::from_secs(15),
1638 np_idle_timeout: Duration::from_secs(600),
1639 internet_down_timeout: Duration::from_secs(600),
1640 filter_threshold: 0.2,
1641 extreme_threshold: 0.01,
1642 }
1643 }
1644}
1645
1646impl TryFrom<&NetParameters> for GuardParams {
1647 type Error = tor_units::Error;
1648 fn try_from(p: &NetParameters) -> Result<GuardParams, Self::Error> {
1649 Ok(GuardParams {
1650 lifetime_unconfirmed: p.guard_lifetime_unconfirmed.try_into()?,
1651 lifetime_confirmed: p.guard_lifetime_confirmed.try_into()?,
1652 lifetime_unlisted: p.guard_remove_unlisted_after.try_into()?,
1653 max_sample_size: p.guard_max_sample_size.try_into()?,
1654 max_sample_bw_fraction: p.guard_max_sample_threshold.as_fraction(),
1655 min_filtered_sample_size: p.guard_filtered_min_sample_size.try_into()?,
1656 n_primary: p.guard_n_primary.try_into()?,
1657 data_parallelism: p.guard_use_parallelism.try_into()?,
1658 dir_parallelism: p.guard_dir_use_parallelism.try_into()?,
1659 np_connect_timeout: p.guard_nonprimary_connect_timeout.try_into()?,
1660 np_idle_timeout: p.guard_nonprimary_idle_timeout.try_into()?,
1661 internet_down_timeout: p.guard_internet_likely_down.try_into()?,
1662 filter_threshold: p.guard_meaningful_restriction.as_fraction(),
1663 extreme_threshold: p.guard_extreme_restriction.as_fraction(),
1664 })
1665 }
1666}
1667
1668/// Representation of a guard or fallback, as returned by [`GuardMgr::select_guard()`].
1669#[derive(Debug, Clone)]
1670pub struct FirstHop {
1671 /// The sample from which this guard was taken, or `None` if this is a fallback.
1672 sample: Option<GuardSetSelector>,
1673 /// Information about connecting to (or through) this guard.
1674 inner: FirstHopInner,
1675}
1676/// The enumeration inside a FirstHop that holds information about how to
1677/// connect to (and possibly through) a guard or fallback.
1678#[derive(Debug, Clone)]
1679enum FirstHopInner {
1680 /// We have enough information to connect to a guard.
1681 Chan(OwnedChanTarget),
1682 /// We have enough information to connect to a guards _and_ to build
1683 /// multihop circuits through it.
1684 #[cfg_attr(not(feature = "bridge-client"), allow(dead_code))]
1685 Circ(OwnedCircTarget),
1686}
1687
1688impl FirstHop {
1689 /// Return a new [`FirstHopId`] for this `FirstHop`.
1690 fn first_hop_id(&self) -> FirstHopId {
1691 match &self.sample {
1692 Some(sample) => {
1693 let guard_id = GuardId::from_relay_ids(self);
1694 FirstHopId::in_sample(sample.clone(), guard_id)
1695 }
1696 None => {
1697 let fallback_id = crate::ids::FallbackId::from_relay_ids(self);
1698 FirstHopId::from(fallback_id)
1699 }
1700 }
1701 }
1702
1703 /// Look up this guard in `netdir`.
1704 pub fn get_relay<'a>(&self, netdir: &'a NetDir) -> Option<Relay<'a>> {
1705 match &self.sample {
1706 #[cfg(feature = "bridge-client")]
1707 // Always return "None" for anything that isn't in the netdir.
1708 Some(s) if s.universe_type() == UniverseType::BridgeSet => None,
1709 // Otherwise ask the netdir.
1710 _ => netdir.by_ids(self),
1711 }
1712 }
1713
1714 /// Return true if this guard is a bridge.
1715 pub fn is_bridge(&self) -> bool {
1716 match &self.sample {
1717 #[cfg(feature = "bridge-client")]
1718 Some(s) if s.universe_type() == UniverseType::BridgeSet => true,
1719 _ => false,
1720 }
1721 }
1722
1723 /// If possible, return a view of this object that can be used to build a circuit.
1724 pub fn as_circ_target(&self) -> Option<&OwnedCircTarget> {
1725 match &self.inner {
1726 FirstHopInner::Chan(_) => None,
1727 FirstHopInner::Circ(ct) => Some(ct),
1728 }
1729 }
1730
1731 /// Return a view of this as an OwnedChanTarget.
1732 fn chan_target_mut(&mut self) -> &mut OwnedChanTarget {
1733 match &mut self.inner {
1734 FirstHopInner::Chan(ct) => ct,
1735 FirstHopInner::Circ(ct) => ct.chan_target_mut(),
1736 }
1737 }
1738
1739 /// If possible and appropriate, find a circuit target in `bridges` for this
1740 /// `FirstHop`, and make this `FirstHop` a viable circuit target.
1741 ///
1742 /// (By default, any `FirstHop` that a `GuardSet` returns will have enough
1743 /// information to be a `ChanTarget`, but it will be lacking the additional
1744 /// network information in `CircTarget`[^1] necessary for us to build a
1745 /// multi-hop circuit through it. If this FirstHop is a regular non-bridge
1746 /// `Relay`, then the `CircMgr` will later look up that circuit information
1747 /// itself from the network directory. But if this `FirstHop` *is* a bridge,
1748 /// then we need to find that information in the `BridgeSet`, since the
1749 /// CircMgr does not keep track of the `BridgeSet`.)
1750 ///
1751 /// [^1]: For example, supported protocol versions and ntor keys.
1752 #[cfg(feature = "bridge-client")]
1753 fn lookup_bridge_circ_target(&mut self, bridges: &bridge::BridgeSet) {
1754 use crate::sample::CandidateStatus::Present;
1755 if self.sample.as_ref().map(|s| s.universe_type()) == Some(UniverseType::BridgeSet)
1756 && matches!(self.inner, FirstHopInner::Chan(_))
1757 {
1758 if let Present(bridge_relay) = bridges.bridge_relay_by_guard(self) {
1759 if let Some(circ_target) = bridge_relay.as_relay_with_desc() {
1760 self.inner =
1761 FirstHopInner::Circ(OwnedCircTarget::from_circ_target(&circ_target));
1762 }
1763 }
1764 }
1765 }
1766
1767 /// Return true if this `FirstHop` contains circuit target information.
1768 ///
1769 /// This is true if `lookup_bridge_circ_target()` has been called, and it
1770 /// successfully found the circuit target information.
1771 #[cfg(feature = "bridge-client")]
1772 fn contains_circ_target(&self) -> bool {
1773 matches!(self.inner, FirstHopInner::Circ(_))
1774 }
1775}
1776
1777// This is somewhat redundant with the implementations in crate::guard::Guard.
1778impl tor_linkspec::HasAddrs for FirstHop {
1779 fn addrs(&self) -> impl Iterator<Item = SocketAddr> {
1780 match &self.inner {
1781 FirstHopInner::Chan(ct) => Either::Left(ct.addrs()),
1782 FirstHopInner::Circ(ct) => Either::Right(ct.addrs()),
1783 }
1784 }
1785}
1786impl tor_linkspec::HasRelayIds for FirstHop {
1787 fn identity(
1788 &self,
1789 key_type: tor_linkspec::RelayIdType,
1790 ) -> Option<tor_linkspec::RelayIdRef<'_>> {
1791 match &self.inner {
1792 FirstHopInner::Chan(ct) => ct.identity(key_type),
1793 FirstHopInner::Circ(ct) => ct.identity(key_type),
1794 }
1795 }
1796}
1797impl tor_linkspec::HasChanMethod for FirstHop {
1798 fn chan_method(&self) -> tor_linkspec::ChannelMethod {
1799 match &self.inner {
1800 FirstHopInner::Chan(ct) => ct.chan_method(),
1801 FirstHopInner::Circ(ct) => ct.chan_method(),
1802 }
1803 }
1804}
1805impl tor_linkspec::ChanTarget for FirstHop {}
1806
1807/// The purpose for which we plan to use a guard.
1808///
1809/// This can affect the guard selection algorithm.
1810#[derive(Clone, Debug, Default, Eq, PartialEq)]
1811#[non_exhaustive]
1812pub enum GuardUsageKind {
1813 /// We want to use this guard for a data circuit.
1814 ///
1815 /// (This encompasses everything except the `OneHopDirectory` case.)
1816 #[default]
1817 Data,
1818 /// We want to use this guard for a one-hop, non-anonymous
1819 /// directory request.
1820 ///
1821 /// (Our algorithm allows more parallelism for the guards that we use
1822 /// for these circuits.)
1823 OneHopDirectory,
1824}
1825
1826/// A set of parameters describing how a single guard should be selected.
1827///
1828/// Used as an argument to [`GuardMgr::select_guard`].
1829#[derive(Clone, Debug, derive_builder::Builder)]
1830#[builder(build_fn(error = "tor_config::ConfigBuildError"))]
1831pub struct GuardUsage {
1832 /// The purpose for which this guard will be used.
1833 #[builder(default)]
1834 kind: GuardUsageKind,
1835 /// A list of restrictions on which guard may be used.
1836 ///
1837 /// The default is the empty list.
1838 #[builder(sub_builder, setter(custom))]
1839 restrictions: GuardRestrictionList,
1840}
1841
1842impl_standard_builder! { GuardUsage: !Deserialize }
1843
1844/// List of socket restrictions, as configured
1845pub type GuardRestrictionList = Vec<GuardRestriction>;
1846
1847define_list_builder_helper! {
1848 pub struct GuardRestrictionListBuilder {
1849 restrictions: [GuardRestriction],
1850 }
1851 built: GuardRestrictionList = restrictions;
1852 default = vec![];
1853 item_build: |restriction| Ok(restriction.clone());
1854 item_apply_defaults: |_| Ok::<_, tor_config::ConfigBuildError>(());
1855}
1856
1857define_list_builder_accessors! {
1858 struct GuardUsageBuilder {
1859 pub restrictions: [GuardRestriction],
1860 }
1861}
1862
1863impl GuardUsageBuilder {
1864 /// Create a new empty [`GuardUsageBuilder`].
1865 pub fn new() -> Self {
1866 Self::default()
1867 }
1868}
1869
1870/// A restriction that applies to a single request for a guard.
1871///
1872/// Restrictions differ from filters (see [`GuardFilter`]) in that
1873/// they apply to single requests, not to our entire set of guards.
1874/// They're suitable for things like making sure that we don't start
1875/// and end a circuit at the same relay, or requiring a specific
1876/// subprotocol version for certain kinds of requests.
1877#[derive(Clone, Debug, Serialize, Deserialize)]
1878#[non_exhaustive]
1879pub enum GuardRestriction {
1880 /// Don't pick a guard with the provided identity.
1881 AvoidId(RelayId),
1882 /// Don't pick a guard with any of the provided Ed25519 identities.
1883 AvoidAllIds(RelayIdSet),
1884}
1885
1886/// The kind of vanguards to use.
1887#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1888#[derive(Serialize, Deserialize)] //
1889#[derive(derive_more::Display)] //
1890#[serde(rename_all = "lowercase")]
1891#[cfg(feature = "vanguards")]
1892#[non_exhaustive]
1893pub enum VanguardMode {
1894 /// "Lite" vanguards.
1895 #[default]
1896 #[display("lite")]
1897 Lite = 1,
1898 /// "Full" vanguards.
1899 #[display("full")]
1900 Full = 2,
1901 /// Vanguards are disabled.
1902 #[display("disabled")]
1903 Disabled = 0,
1904}
1905
1906#[cfg(feature = "vanguards")]
1907impl VanguardMode {
1908 /// Build a `VanguardMode` from a [`NetParameters`] parameter.
1909 ///
1910 /// Used for converting [`vanguards_enabled`](NetParameters::vanguards_enabled)
1911 /// or [`vanguards_hs_service`](NetParameters::vanguards_hs_service)
1912 /// to the corresponding `VanguardMode`.
1913 pub(crate) fn from_net_parameter(val: BoundedInt32<0, 2>) -> Self {
1914 match val.get() {
1915 0 => VanguardMode::Disabled,
1916 1 => VanguardMode::Lite,
1917 2 => VanguardMode::Full,
1918 _ => unreachable!("BoundedInt32 was not bounded?!"),
1919 }
1920 }
1921}
1922
1923impl_not_auto_value!(VanguardMode);
1924
1925/// Vanguards configuration.
1926#[derive(Deftly, Clone, Debug, PartialEq, Eq)]
1927#[derive_deftly(TorConfig)]
1928pub struct VanguardConfig {
1929 /// The kind of vanguards to use.
1930 #[deftly(tor_config(default))]
1931 mode: ExplicitOrAuto<VanguardMode>,
1932}
1933
1934impl VanguardConfig {
1935 /// Return the configured [`VanguardMode`].
1936 ///
1937 /// Returns the [`Default`] `VanguardMode`
1938 /// if the mode is [`Auto`](ExplicitOrAuto) or unspecified.
1939 pub fn mode(&self) -> VanguardMode {
1940 match self.mode {
1941 ExplicitOrAuto::Auto => Default::default(),
1942 ExplicitOrAuto::Explicit(mode) => mode,
1943 }
1944 }
1945}
1946
1947/// The kind of vanguards to use.
1948#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1949#[derive(Serialize, Deserialize)] //
1950#[derive(derive_more::Display)] //
1951#[serde(rename_all = "lowercase")]
1952#[cfg(not(feature = "vanguards"))]
1953#[non_exhaustive]
1954pub enum VanguardMode {
1955 /// Vanguards are disabled.
1956 #[default]
1957 #[display("disabled")]
1958 Disabled = 0,
1959}
1960
1961#[cfg(test)]
1962mod test {
1963 // @@ begin test lint list maintained by maint/add_warning @@
1964 #![allow(clippy::bool_assert_comparison)]
1965 #![allow(clippy::clone_on_copy)]
1966 #![allow(clippy::dbg_macro)]
1967 #![allow(clippy::mixed_attributes_style)]
1968 #![allow(clippy::print_stderr)]
1969 #![allow(clippy::print_stdout)]
1970 #![allow(clippy::single_char_pattern)]
1971 #![allow(clippy::unwrap_used)]
1972 #![allow(clippy::unchecked_time_subtraction)]
1973 #![allow(clippy::useless_vec)]
1974 #![allow(clippy::needless_pass_by_value)]
1975 #![allow(clippy::string_slice)] // See arti#2571
1976 //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
1977 use super::*;
1978 use itertools::Itertools;
1979 use tor_linkspec::{HasAddrs, HasRelayIds};
1980 use tor_persist::TestingStateMgr;
1981 use tor_rtcompat::test_with_all_runtimes;
1982
1983 #[test]
1984 fn guard_param_defaults() {
1985 let p1 = GuardParams::default();
1986 let p2: GuardParams = (&NetParameters::default()).try_into().unwrap();
1987 assert_eq!(p1, p2);
1988 }
1989
1990 fn init<R: Runtime>(rt: R) -> (GuardMgr<R>, TestingStateMgr, NetDir) {
1991 use tor_netdir::{MdReceiver, PartialNetDir, testnet};
1992 let statemgr = TestingStateMgr::new();
1993 let have_lock = statemgr.try_lock().unwrap();
1994 assert!(have_lock.held());
1995 let guardmgr = GuardMgr::new(rt, statemgr.clone(), &TestConfig::default()).unwrap();
1996 let (con, mds) = testnet::construct_network().unwrap();
1997 let param_overrides = vec![
1998 // We make the sample size smaller than usual to compensate for the
1999 // small testing network. (Otherwise, we'd sample the whole network,
2000 // and not be able to observe guards in the tests.)
2001 "guard-min-filtered-sample-size=5",
2002 // We choose only two primary guards, to make the tests easier to write.
2003 "guard-n-primary-guards=2",
2004 // We define any restriction that allows 75% or fewer of relays as "meaningful",
2005 // so that we can test the "restrictive" guard sample behavior, and to avoid
2006 "guard-meaningful-restriction-percent=75",
2007 ];
2008 let param_overrides: String = param_overrides.into_iter().join(" ");
2009 let override_p = param_overrides.parse().unwrap();
2010 let mut netdir = PartialNetDir::new(con, Some(&override_p));
2011 for md in mds {
2012 netdir.add_microdesc(md);
2013 }
2014 let netdir = netdir.unwrap_if_sufficient().unwrap();
2015
2016 (guardmgr, statemgr, netdir)
2017 }
2018
2019 #[test]
2020 #[allow(clippy::clone_on_copy)]
2021 fn simple_case() {
2022 test_with_all_runtimes!(|rt| async move {
2023 let (guardmgr, statemgr, netdir) = init(rt.clone());
2024 let usage = GuardUsage::default();
2025 guardmgr.install_test_netdir(&netdir);
2026
2027 let (id, mon, usable) = guardmgr.select_guard(usage).unwrap();
2028 // Report that the circuit succeeded.
2029 mon.succeeded();
2030
2031 // May we use the circuit?
2032 let usable = usable.await.unwrap();
2033 assert!(usable);
2034
2035 // Save the state...
2036 guardmgr.flush_msg_queue().await;
2037 guardmgr.store_persistent_state().unwrap();
2038 drop(guardmgr);
2039
2040 // Try reloading from the state...
2041 let guardmgr2 =
2042 GuardMgr::new(rt.clone(), statemgr.clone(), &TestConfig::default()).unwrap();
2043 guardmgr2.install_test_netdir(&netdir);
2044
2045 // Since the guard was confirmed, we should get the same one this time!
2046 let usage = GuardUsage::default();
2047 let (id2, _mon, _usable) = guardmgr2.select_guard(usage).unwrap();
2048 assert!(id2.same_relay_ids(&id));
2049 });
2050 }
2051
2052 #[test]
2053 fn simple_waiting() {
2054 // TODO(nickm): This test fails in rare cases; I suspect a
2055 // race condition somewhere.
2056 //
2057 // I've doubled up on the queue flushing in order to try to make the
2058 // race less likely, but we should investigate.
2059 test_with_all_runtimes!(|rt| async move {
2060 let (guardmgr, _statemgr, netdir) = init(rt);
2061 let u = GuardUsage::default();
2062 guardmgr.install_test_netdir(&netdir);
2063
2064 // We'll have the first two guard fail, which should make us
2065 // try a non-primary guard.
2066 let (id1, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2067 mon.failed();
2068 guardmgr.flush_msg_queue().await; // avoid race
2069 guardmgr.flush_msg_queue().await; // avoid race
2070 let (id2, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2071 mon.failed();
2072 guardmgr.flush_msg_queue().await; // avoid race
2073 guardmgr.flush_msg_queue().await; // avoid race
2074
2075 assert!(!id1.same_relay_ids(&id2));
2076
2077 // Now we should get two sampled guards. They should be different.
2078 let (id3, mon3, usable3) = guardmgr.select_guard(u.clone()).unwrap();
2079 let (id4, mon4, usable4) = guardmgr.select_guard(u.clone()).unwrap();
2080 assert!(!id3.same_relay_ids(&id4));
2081
2082 let (u3, u4) = futures::join!(
2083 async {
2084 mon3.failed();
2085 guardmgr.flush_msg_queue().await; // avoid race
2086 usable3.await.unwrap()
2087 },
2088 async {
2089 mon4.succeeded();
2090 usable4.await.unwrap()
2091 }
2092 );
2093
2094 assert_eq!((u3, u4), (false, true));
2095 });
2096 }
2097
2098 #[test]
2099 fn filtering_basics() {
2100 test_with_all_runtimes!(|rt| async move {
2101 let (guardmgr, _statemgr, netdir) = init(rt);
2102 let u = GuardUsage::default();
2103 let filter = {
2104 let mut f = GuardFilter::default();
2105 // All the addresses in the test network are {0,1,2,3,4}.0.0.3:9001.
2106 // Limit to only 2.0.0.0/8
2107 f.push_reachable_addresses(vec!["2.0.0.0/8:9001".parse().unwrap()]);
2108 f
2109 };
2110 guardmgr.set_filter(filter);
2111 guardmgr.install_test_netdir(&netdir);
2112 let (guard, _mon, _usable) = guardmgr.select_guard(u).unwrap();
2113 // Make sure that the filter worked.
2114 let addr = guard.addrs().next().unwrap();
2115 assert_eq!(addr, "2.0.0.3:9001".parse().unwrap());
2116 });
2117 }
2118
2119 #[test]
2120 fn external_status() {
2121 test_with_all_runtimes!(|rt| async move {
2122 let (guardmgr, _statemgr, netdir) = init(rt);
2123 let data_usage = GuardUsage::default();
2124 let dir_usage = GuardUsageBuilder::new()
2125 .kind(GuardUsageKind::OneHopDirectory)
2126 .build()
2127 .unwrap();
2128 guardmgr.install_test_netdir(&netdir);
2129 {
2130 // Override this parameter, so that we can get deterministic results below.
2131 let mut inner = guardmgr.inner.lock().unwrap();
2132 inner.params.dir_parallelism = 1;
2133 }
2134
2135 let (guard, mon, _usable) = guardmgr.select_guard(data_usage.clone()).unwrap();
2136 mon.succeeded();
2137
2138 // Record that this guard gave us a bad directory object.
2139 guardmgr.note_external_failure(&guard, ExternalActivity::DirCache);
2140
2141 // We ask for another guard, for data usage. We should get the same
2142 // one as last time, since the director failure doesn't mean this
2143 // guard is useless as a primary guard.
2144 let (g2, mon, _usable) = guardmgr.select_guard(data_usage).unwrap();
2145 assert_eq!(g2.ed_identity(), guard.ed_identity());
2146 mon.succeeded();
2147
2148 // But if we ask for a guard for directory usage, we should get a
2149 // different one, since the last guard we gave out failed.
2150 let (g3, mon, _usable) = guardmgr.select_guard(dir_usage.clone()).unwrap();
2151 assert_ne!(g3.ed_identity(), guard.ed_identity());
2152 mon.succeeded();
2153
2154 // Now record a success for directory usage.
2155 guardmgr.note_external_success(&guard, ExternalActivity::DirCache);
2156
2157 // Now that the guard is working as a cache, asking for it should get us the same guard.
2158 let (g4, _mon, _usable) = guardmgr.select_guard(dir_usage).unwrap();
2159 assert_eq!(g4.ed_identity(), guard.ed_identity());
2160 });
2161 }
2162
2163 #[cfg(feature = "vanguards")]
2164 #[test]
2165 fn vanguard_mode_ord() {
2166 assert!(VanguardMode::Disabled < VanguardMode::Lite);
2167 assert!(VanguardMode::Disabled < VanguardMode::Full);
2168 assert!(VanguardMode::Lite < VanguardMode::Full);
2169 }
2170}