tor_guardmgr/lib.rs
1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3// @@ begin lint list maintained by maint/add_warning @@
4#![allow(renamed_and_removed_lints)] // @@REMOVE_WHEN(ci_arti_stable)
5#![allow(unknown_lints)] // @@REMOVE_WHEN(ci_arti_nightly)
6#![warn(missing_docs)]
7#![warn(noop_method_call)]
8#![warn(unreachable_pub)]
9#![warn(clippy::all)]
10#![deny(clippy::await_holding_lock)]
11#![deny(clippy::cargo_common_metadata)]
12#![deny(clippy::cast_lossless)]
13#![deny(clippy::checked_conversions)]
14#![warn(clippy::cognitive_complexity)]
15#![deny(clippy::debug_assert_with_mut_call)]
16#![deny(clippy::exhaustive_enums)]
17#![deny(clippy::exhaustive_structs)]
18#![deny(clippy::expl_impl_clone_on_copy)]
19#![deny(clippy::fallible_impl_from)]
20#![deny(clippy::implicit_clone)]
21#![deny(clippy::large_stack_arrays)]
22#![warn(clippy::manual_ok_or)]
23#![deny(clippy::missing_docs_in_private_items)]
24#![warn(clippy::needless_borrow)]
25#![warn(clippy::needless_pass_by_value)]
26#![warn(clippy::option_option)]
27#![deny(clippy::print_stderr)]
28#![deny(clippy::print_stdout)]
29#![warn(clippy::rc_buffer)]
30#![deny(clippy::ref_option_ref)]
31#![warn(clippy::semicolon_if_nothing_returned)]
32#![warn(clippy::trait_duplication_in_bounds)]
33#![deny(clippy::unchecked_time_subtraction)]
34#![deny(clippy::unnecessary_wraps)]
35#![warn(clippy::unseparated_literal_suffix)]
36#![deny(clippy::unwrap_used)]
37#![deny(clippy::mod_module_files)]
38#![allow(clippy::let_unit_value)] // This can reasonably be done for explicitness
39#![allow(clippy::uninlined_format_args)]
40#![allow(clippy::significant_drop_in_scrutinee)] // arti/-/merge_requests/588/#note_2812945
41#![allow(clippy::result_large_err)] // temporary workaround for arti#587
42#![allow(clippy::needless_raw_string_hashes)] // complained-about code is fine, often best
43#![allow(clippy::needless_lifetimes)] // See arti#1765
44#![allow(mismatched_lifetime_syntaxes)] // temporary workaround for arti#2060
45//! <!-- @@ end lint list maintained by maint/add_warning @@ -->
46
47// TODO #1645 (either remove this, or decide to have it everywhere)
48#![cfg_attr(not(all(feature = "full", feature = "experimental")), allow(unused))]
49
50// Glossary:
51// Primary guard
52// Sample
53// confirmed
54// filtered
55
56use futures::channel::mpsc;
57use itertools::Either;
58use serde::{Deserialize, Serialize};
59use std::collections::HashMap;
60use std::net::SocketAddr;
61use std::sync::{Arc, Mutex, Weak};
62use std::time::{Duration, Instant, SystemTime};
63#[cfg(feature = "bridge-client")]
64use tor_error::internal;
65use tor_linkspec::{OwnedChanTarget, OwnedCircTarget, RelayId, RelayIdSet};
66use tor_netdir::NetDirProvider;
67use tor_proto::ClockSkew;
68use tor_rtcompat::SpawnExt;
69use tor_units::BoundedInt32;
70use tracing::{debug, info, instrument, trace, warn};
71
72use tor_config::{ExplicitOrAuto, impl_standard_builder};
73use tor_config::{ReconfigureError, impl_not_auto_value};
74use tor_config::{define_list_builder_accessors, define_list_builder_helper};
75use tor_netdir::{NetDir, Relay, params::NetParameters};
76use tor_persist::{DynStorageHandle, StateMgr};
77use tor_rtcompat::Runtime;
78
79#[cfg(feature = "bridge-client")]
80pub mod bridge;
81mod config;
82mod daemon;
83mod dirstatus;
84mod err;
85mod events;
86pub mod fallback;
87mod filter;
88mod guard;
89mod ids;
90mod pending;
91mod sample;
92mod skew;
93mod util;
94#[cfg(feature = "vanguards")]
95pub mod vanguards;
96
97#[cfg(not(feature = "bridge-client"))]
98#[path = "bridge_disabled.rs"]
99pub mod bridge;
100
101#[cfg(any(test, feature = "testing"))]
102pub use config::testing::TestConfig;
103
104#[cfg(test)]
105use oneshot_fused_workaround as oneshot;
106
107pub use config::GuardMgrConfig;
108pub use err::{GuardMgrConfigError, GuardMgrError, PickGuardError};
109pub use events::ClockSkewEvents;
110pub use filter::GuardFilter;
111pub use ids::FirstHopId;
112pub use pending::{GuardMonitor, GuardStatus, GuardUsable};
113pub use skew::SkewEstimate;
114
115#[cfg(feature = "vanguards")]
116#[cfg_attr(docsrs, doc(cfg(feature = "vanguards")))]
117pub use vanguards::VanguardMgrError;
118
119use pending::{PendingRequest, RequestId};
120use sample::{GuardSet, Universe, UniverseRef};
121
122use crate::ids::{FirstHopIdInner, GuardId};
123
124use tor_config::ConfigBuildError;
125
126/// A "guard manager" that selects and remembers a persistent set of
127/// guard nodes.
128///
129/// This is a "handle"; clones of it share state.
130#[derive(Clone)]
131pub struct GuardMgr<R: Runtime> {
132 /// An asynchronous runtime object.
133 ///
134 /// GuardMgr uses this runtime for timing, timeouts, and spawning
135 /// tasks.
136 runtime: R,
137
138 /// Internal state for the guard manager.
139 inner: Arc<Mutex<GuardMgrInner>>,
140}
141
142/// Helper type that holds the data used by a [`GuardMgr`].
143///
144/// This would just be a [`GuardMgr`], except that it needs to sit inside
145/// a `Mutex` and get accessed by daemon tasks.
146struct GuardMgrInner {
147 /// Last time when marked all of our primary guards as retriable.
148 ///
149 /// We keep track of this time so that we can rate-limit
150 /// these attempts.
151 last_primary_retry_time: Instant,
152
153 /// Persistent guard manager state.
154 ///
155 /// This object remembers one or more persistent set of guards that we can
156 /// use, along with their relative priorities and statuses.
157 guards: GuardSets,
158
159 /// The current filter that we're using to decide which guards are
160 /// supported.
161 //
162 // TODO: This field is duplicated in the current active [`GuardSet`]; we
163 // should fix that.
164 filter: GuardFilter,
165
166 /// Configuration values derived from the consensus parameters.
167 ///
168 /// This is updated whenever the consensus parameters change.
169 params: GuardParams,
170
171 /// A mpsc channel, used to tell the task running in
172 /// [`daemon::report_status_events`] about a new event to monitor.
173 ///
174 /// This uses an `UnboundedSender` so that we don't have to await
175 /// while sending the message, which in turn allows the GuardMgr
176 /// API to be simpler. The risk, however, is that there's no
177 /// backpressure in the event that the task running
178 /// [`daemon::report_status_events`] fails to read from this
179 /// channel.
180 ctrl: mpsc::UnboundedSender<daemon::Msg>,
181
182 /// Information about guards that we've given out, but where we have
183 /// not yet heard whether the guard was successful.
184 ///
185 /// Upon leaning whether the guard was successful, the pending
186 /// requests in this map may be either moved to `waiting`, or
187 /// discarded.
188 ///
189 /// There can be multiple pending requests corresponding to the
190 /// same guard.
191 pending: HashMap<RequestId, PendingRequest>,
192
193 /// A list of pending requests for which we have heard that the
194 /// guard was successful, but we have not yet decided whether the
195 /// circuit may be used.
196 ///
197 /// There can be multiple waiting requests corresponding to the
198 /// same guard.
199 waiting: Vec<PendingRequest>,
200
201 /// A list of fallback directories used to access the directory system
202 /// when no other directory information is yet known.
203 fallbacks: fallback::FallbackState,
204
205 /// Location in which to store persistent state.
206 storage: DynStorageHandle<GuardSets>,
207
208 /// A sender object to publish changes in our estimated clock skew.
209 send_skew: postage::watch::Sender<Option<SkewEstimate>>,
210
211 /// A receiver object to hand out to observers who want to know about
212 /// changes in our estimated clock skew.
213 recv_skew: events::ClockSkewEvents,
214
215 /// A netdir provider that we can use for adding new guards when
216 /// insufficient guards are available.
217 ///
218 /// This has to be an Option so it can be initialized from None: at the
219 /// time a GuardMgr is created, there is no NetDirProvider for it to use.
220 netdir_provider: Option<Weak<dyn NetDirProvider>>,
221
222 /// A netdir provider that we can use for discovering bridge descriptors.
223 ///
224 /// This has to be an Option so it can be initialized from None: at the time
225 /// a GuardMgr is created, there is no BridgeDescProvider for it to use.
226 #[cfg(feature = "bridge-client")]
227 bridge_desc_provider: Option<Weak<dyn bridge::BridgeDescProvider>>,
228
229 /// A list of the bridges that we are configured to use, or "None" if we are
230 /// not configured to use bridges.
231 #[cfg(feature = "bridge-client")]
232 configured_bridges: Option<Arc<[bridge::BridgeConfig]>>,
233}
234
235/// A selector that tells us which [`GuardSet`] of several is currently in use.
236#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, strum::EnumIter)]
237enum GuardSetSelector {
238 /// The default guard set is currently in use: that's the one that we use
239 /// when we have no filter installed, or the filter permits most of the
240 /// guards on the network.
241 #[default]
242 Default,
243 /// A "restrictive" guard set is currently in use: that's the one that we
244 /// use when we have a filter that excludes a large fraction of the guards
245 /// on the network.
246 Restricted,
247 /// The "bridges" guard set is currently in use: we are selecting our guards
248 /// from among the universe of configured bridges.
249 #[cfg(feature = "bridge-client")]
250 Bridges,
251}
252
253/// Describes the [`Universe`] that a guard sample should take its guards from.
254#[derive(Clone, Copy, Debug, Eq, PartialEq)]
255enum UniverseType {
256 /// Take information from the network directory.
257 NetDir,
258 /// Take information from the configured bridges.
259 #[cfg(feature = "bridge-client")]
260 BridgeSet,
261}
262
263impl GuardSetSelector {
264 /// Return a description of which [`Universe`] this guard sample should take
265 /// its guards from.
266 fn universe_type(&self) -> UniverseType {
267 match self {
268 GuardSetSelector::Default | GuardSetSelector::Restricted => UniverseType::NetDir,
269 #[cfg(feature = "bridge-client")]
270 GuardSetSelector::Bridges => UniverseType::BridgeSet,
271 }
272 }
273}
274
275/// Persistent state for a guard manager, as serialized to disk.
276#[derive(Debug, Clone, Default, Serialize, Deserialize)]
277struct GuardSets {
278 /// Which set of guards is currently in use?
279 #[serde(skip)]
280 active_set: GuardSetSelector,
281
282 /// The default set of guards to use.
283 ///
284 /// We use this one when there is no filter, or the filter permits most of the
285 /// guards on the network.
286 default: GuardSet,
287
288 /// A guard set to use when we have a restrictive filter.
289 #[serde(default)]
290 restricted: GuardSet,
291
292 /// A guard set sampled from our configured bridges.
293 #[serde(default)]
294 #[cfg(feature = "bridge-client")]
295 bridges: GuardSet,
296
297 /// Unrecognized fields, including (possibly) other guard sets.
298 #[serde(flatten)]
299 remaining: HashMap<String, tor_persist::JsonValue>,
300}
301
302/// The key (filename) we use for storing our persistent guard state in the
303/// `StateMgr`.
304///
305/// We used to store this in a different format in a filename called
306/// "default_guards" (before Arti 0.1.0).
307const STORAGE_KEY: &str = "guards";
308
309/// A description of which circuits to retire because of a configuration change.
310///
311/// TODO(nickm): Eventually we will want to add a "Some" here, to support
312/// removing only those circuits that correspond to no-longer-usable guards.
313#[derive(Clone, Debug, Eq, PartialEq)]
314#[must_use]
315#[non_exhaustive]
316pub enum RetireCircuits {
317 /// There's no need to retire any circuits.
318 None,
319 /// All circuits should be retired.
320 All,
321}
322
323impl<R: Runtime> GuardMgr<R> {
324 /// Create a new "empty" guard manager and launch its background tasks.
325 ///
326 /// It won't be able to hand out any guards until a [`NetDirProvider`] has
327 /// been installed.
328 #[instrument(skip_all, level = "trace")]
329 pub fn new<S>(
330 runtime: R,
331 state_mgr: S,
332 config: &impl GuardMgrConfig,
333 ) -> Result<Self, GuardMgrError>
334 where
335 S: StateMgr + Send + Sync + 'static,
336 {
337 let (ctrl, rcv) = mpsc::unbounded();
338 let storage: DynStorageHandle<GuardSets> = state_mgr.create_handle(STORAGE_KEY);
339 // TODO(nickm): We should do something about the old state in
340 // `default_guards`. Probably it would be best to delete it. We could
341 // try to migrate it instead, but that's beyond the stability guarantee
342 // that we're getting at this stage of our (pre-0.1) development.
343 let state = storage.load()?.unwrap_or_default();
344
345 let (send_skew, recv_skew) = postage::watch::channel();
346 let recv_skew = ClockSkewEvents { inner: recv_skew };
347
348 let inner = Arc::new(Mutex::new(GuardMgrInner {
349 guards: state,
350 filter: GuardFilter::unfiltered(),
351 last_primary_retry_time: runtime.now(),
352 params: GuardParams::default(),
353 ctrl,
354 pending: HashMap::new(),
355 waiting: Vec::new(),
356 fallbacks: config.fallbacks().into(),
357 storage,
358 send_skew,
359 recv_skew,
360 netdir_provider: None,
361 #[cfg(feature = "bridge-client")]
362 bridge_desc_provider: None,
363 #[cfg(feature = "bridge-client")]
364 configured_bridges: None,
365 }));
366 #[cfg(feature = "bridge-client")]
367 {
368 let mut inner = inner.lock().expect("lock poisoned");
369 // TODO(nickm): This calls `GuardMgrInner::update`. Will we mind doing so before any
370 // providers are configured? I think not, but we should make sure.
371 let _: RetireCircuits =
372 inner.replace_bridge_config(config, runtime.wallclock(), runtime.now())?;
373 }
374 {
375 let weak_inner = Arc::downgrade(&inner);
376 let rt_clone = runtime.clone();
377 runtime
378 .spawn(daemon::report_status_events(rt_clone, weak_inner, rcv))
379 .map_err(|e| GuardMgrError::from_spawn("guard status event reporter", e))?;
380 }
381 {
382 let rt_clone = runtime.clone();
383 let weak_inner = Arc::downgrade(&inner);
384 runtime
385 .spawn(daemon::run_periodic(rt_clone, weak_inner))
386 .map_err(|e| GuardMgrError::from_spawn("periodic guard updater", e))?;
387 }
388 Ok(GuardMgr { runtime, inner })
389 }
390
391 /// Install a [`NetDirProvider`] for use by this guard manager.
392 ///
393 /// It will be used to keep the guards up-to-date with changes from the
394 /// network directory, and to find new guards when no NetDir is provided to
395 /// select_guard().
396 ///
397 /// TODO: we should eventually return some kind of a task handle from this
398 /// task, even though it is not strictly speaking periodic.
399 ///
400 /// The guardmgr retains only a `Weak` reference to `provider`,
401 /// `install_netdir_provider` downgrades it on entry,
402 // TODO add ref to document when https://gitlab.torproject.org/tpo/core/arti/-/issues/624
403 // is fixed. Also, maybe take an owned `Weak` to start with.
404 //
405 /// # Panics
406 ///
407 /// Panics if a [`NetDirProvider`] is already installed.
408 pub fn install_netdir_provider(
409 &self,
410 provider: &Arc<dyn NetDirProvider>,
411 ) -> Result<(), GuardMgrError> {
412 let weak_provider = Arc::downgrade(provider);
413 {
414 let mut inner = self.inner.lock().expect("Poisoned lock");
415 assert!(inner.netdir_provider.is_none());
416 inner.netdir_provider = Some(weak_provider.clone());
417 }
418 let weak_inner = Arc::downgrade(&self.inner);
419 let rt_clone = self.runtime.clone();
420 self.runtime
421 .spawn(daemon::keep_netdir_updated(
422 rt_clone,
423 weak_inner,
424 weak_provider,
425 ))
426 .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
427 Ok(())
428 }
429
430 /// Configure a new [`bridge::BridgeDescProvider`] for this [`GuardMgr`].
431 ///
432 /// It will be used to learn about changes in the set of available bridge
433 /// descriptors; we'll inform it whenever our desired set of bridge
434 /// descriptors changes.
435 ///
436 /// TODO: Same todo as in `install_netdir_provider` about task handles.
437 ///
438 /// # Panics
439 ///
440 /// Panics if a [`bridge::BridgeDescProvider`] is already installed.
441 #[cfg(feature = "bridge-client")]
442 pub fn install_bridge_desc_provider(
443 &self,
444 provider: &Arc<dyn bridge::BridgeDescProvider>,
445 ) -> Result<(), GuardMgrError> {
446 let weak_provider = Arc::downgrade(provider);
447 {
448 let mut inner = self.inner.lock().expect("Poisoned lock");
449 assert!(inner.bridge_desc_provider.is_none());
450 inner.bridge_desc_provider = Some(weak_provider.clone());
451 }
452
453 let weak_inner = Arc::downgrade(&self.inner);
454 let rt_clone = self.runtime.clone();
455 self.runtime
456 .spawn(daemon::keep_bridge_descs_updated(
457 rt_clone,
458 weak_inner,
459 weak_provider,
460 ))
461 .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
462
463 Ok(())
464 }
465
466 /// Flush our current guard state to the state manager, if there
467 /// is any unsaved state.
468 pub fn store_persistent_state(&self) -> Result<(), GuardMgrError> {
469 let inner = self.inner.lock().expect("Poisoned lock");
470 trace!("Flushing guard state to disk.");
471 inner.storage.store(&inner.guards)?;
472 Ok(())
473 }
474
475 /// Reload state from the state manager.
476 ///
477 /// We only call this method if we _don't_ have the lock on the state
478 /// files. If we have the lock, we only want to save.
479 #[instrument(level = "trace", skip_all)]
480 pub fn reload_persistent_state(&self) -> Result<(), GuardMgrError> {
481 let mut inner = self.inner.lock().expect("Poisoned lock");
482 if let Some(new_guards) = inner.storage.load()? {
483 inner.replace_guards_with(new_guards, self.runtime.wallclock(), self.runtime.now());
484 }
485 Ok(())
486 }
487
488 /// Switch from having an unowned persistent state to having an owned one.
489 ///
490 /// Requires that we hold the lock on the state files.
491 #[instrument(level = "trace", skip_all)]
492 pub fn upgrade_to_owned_persistent_state(&self) -> Result<(), GuardMgrError> {
493 let mut inner = self.inner.lock().expect("Poisoned lock");
494 debug_assert!(inner.storage.can_store());
495 let new_guards = inner.storage.load()?.unwrap_or_default();
496 let wallclock = self.runtime.wallclock();
497 let now = self.runtime.now();
498 inner.replace_guards_with(new_guards, wallclock, now);
499 Ok(())
500 }
501
502 /// Return true if `netdir` has enough information to safely become our new netdir.
503 pub fn netdir_is_sufficient(&self, netdir: &NetDir) -> bool {
504 let mut inner = self.inner.lock().expect("Poisoned lock");
505 if inner.guards.active_set.universe_type() != UniverseType::NetDir {
506 // If we aren't using the netdir, this isn't something we want to look at.
507 return true;
508 }
509 inner
510 .guards
511 .active_guards_mut()
512 .n_primary_without_id_info_in(netdir)
513 == 0
514 }
515
516 /// Mark every guard as potentially retriable, regardless of how recently we
517 /// failed to connect to it.
518 pub fn mark_all_guards_retriable(&self) {
519 let mut inner = self.inner.lock().expect("Poisoned lock");
520 inner.guards.active_guards_mut().mark_all_guards_retriable();
521 }
522
523 /// Configure this guardmgr to use a fixed [`NetDir`] instead of a provider.
524 ///
525 /// This function is for testing only, and is exclusive with
526 /// `install_netdir_provider`.
527 ///
528 /// # Panics
529 ///
530 /// Panics if any [`NetDirProvider`] has already been installed.
531 #[cfg(any(test, feature = "testing"))]
532 pub fn install_test_netdir(&self, netdir: &NetDir) {
533 use tor_netdir::testprovider::TestNetDirProvider;
534 let wallclock = self.runtime.wallclock();
535 let now = self.runtime.now();
536 let netdir_provider: Arc<dyn NetDirProvider> =
537 Arc::new(TestNetDirProvider::from(netdir.clone()));
538 self.install_netdir_provider(&netdir_provider)
539 .expect("Couldn't install testing network provider");
540
541 let mut inner = self.inner.lock().expect("Poisoned lock");
542 inner.update(wallclock, now);
543 }
544
545 /// Replace the configuration in this `GuardMgr` with `config`.
546 #[instrument(level = "trace", skip_all)]
547 pub fn reconfigure(
548 &self,
549 config: &impl GuardMgrConfig,
550 ) -> Result<RetireCircuits, ReconfigureError> {
551 let mut inner = self.inner.lock().expect("Poisoned lock");
552 // Change the set of configured fallbacks.
553 {
554 let mut fallbacks: fallback::FallbackState = config.fallbacks().into();
555 std::mem::swap(&mut inner.fallbacks, &mut fallbacks);
556 inner.fallbacks.take_status_from(fallbacks);
557 }
558 // If we are built to use bridges, change the bridge configuration.
559 #[cfg(feature = "bridge-client")]
560 {
561 let wallclock = self.runtime.wallclock();
562 let now = self.runtime.now();
563 Ok(inner.replace_bridge_config(config, wallclock, now)?)
564 }
565 // If we are built to use bridges, change the bridge configuration.
566 #[cfg(not(feature = "bridge-client"))]
567 {
568 Ok(RetireCircuits::None)
569 }
570 }
571
572 /// Replace the current [`GuardFilter`] used by this `GuardMgr`.
573 // TODO should this be part of the config?
574 pub fn set_filter(&self, filter: GuardFilter) {
575 let wallclock = self.runtime.wallclock();
576 let now = self.runtime.now();
577 let mut inner = self.inner.lock().expect("Poisoned lock");
578 inner.set_filter(filter, wallclock, now);
579 }
580
581 /// Select a guard for a given [`GuardUsage`].
582 ///
583 /// On success, we return a [`FirstHop`] object to identify which
584 /// guard we have picked, a [`GuardMonitor`] object that the
585 /// caller can use to report whether its attempt to use the guard
586 /// succeeded or failed, and a [`GuardUsable`] future that the
587 /// caller can use to decide whether a circuit built through the
588 /// guard is actually safe to use.
589 ///
590 /// That last point is important: It's okay to build a circuit
591 /// through the guard returned by this function, but you can't
592 /// actually use it for traffic unless the [`GuardUsable`] future
593 /// yields "true".
594 #[instrument(skip_all, level = "trace")]
595 pub fn select_guard(
596 &self,
597 usage: GuardUsage,
598 ) -> Result<(FirstHop, GuardMonitor, GuardUsable), PickGuardError> {
599 let now = self.runtime.now();
600 let wallclock = self.runtime.wallclock();
601
602 let mut inner = self.inner.lock().expect("Poisoned lock");
603
604 // (I am not 100% sure that we need to consider_all_retries here, but
605 // it should _probably_ not hurt.)
606 inner.guards.active_guards_mut().consider_all_retries(now);
607
608 let (origin, guard) = inner.select_guard_with_expand(&usage, now, wallclock)?;
609 trace!(?guard, ?usage, "Guard selected");
610
611 let (usable, usable_sender) = if origin.usable_immediately() {
612 (GuardUsable::new_usable_immediately(), None)
613 } else {
614 let (u, snd) = GuardUsable::new_uncertain();
615 (u, Some(snd))
616 };
617 let request_id = pending::RequestId::next();
618 let ctrl = inner.ctrl.clone();
619 let monitor = GuardMonitor::new(request_id, ctrl);
620
621 // Note that the network can be down even if all the primary guards
622 // are not yet marked as unreachable. But according to guard-spec we
623 // don't want to acknowledge the net as down before that point, since
624 // we don't mark all the primary guards as retriable unless
625 // we've been forced to non-primary guards.
626 let net_has_been_down =
627 if let Some(duration) = tor_proto::time_since_last_incoming_traffic() {
628 inner
629 .guards
630 .active_guards_mut()
631 .all_primary_guards_are_unreachable()
632 && duration >= inner.params.internet_down_timeout
633 } else {
634 // TODO: Is this the correct behavior in this case?
635 false
636 };
637
638 let pending_request = pending::PendingRequest::new(
639 guard.first_hop_id(),
640 usage,
641 usable_sender,
642 net_has_been_down,
643 );
644 inner.pending.insert(request_id, pending_request);
645
646 match &guard.sample {
647 Some(sample) => {
648 let guard_id = GuardId::from_relay_ids(&guard);
649 inner
650 .guards
651 .guards_mut(sample)
652 .record_attempt(&guard_id, now);
653 }
654 None => {
655 // We don't record attempts for fallbacks; we only care when
656 // they have failed.
657 }
658 }
659
660 Ok((guard, monitor, usable))
661 }
662
663 /// Record that _after_ we built a circuit with a guard, something described
664 /// in `external_failure` went wrong with it.
665 pub fn note_external_failure<T>(&self, identity: &T, external_failure: ExternalActivity)
666 where
667 T: tor_linkspec::HasRelayIds + ?Sized,
668 {
669 let now = self.runtime.now();
670 let mut inner = self.inner.lock().expect("Poisoned lock");
671 let ids = inner.lookup_ids(identity);
672 for id in ids {
673 match &id.0 {
674 FirstHopIdInner::Guard(sample, id) => {
675 inner
676 .guards
677 .guards_mut(sample)
678 .record_failure(id, Some(external_failure), now);
679 }
680 FirstHopIdInner::Fallback(id) => {
681 if external_failure == ExternalActivity::DirCache {
682 inner.fallbacks.note_failure(id, now);
683 }
684 }
685 }
686 }
687 }
688
689 /// Record that _after_ we built a circuit with a guard, some activity
690 /// described in `external_activity` was successful with it.
691 pub fn note_external_success<T>(&self, identity: &T, external_activity: ExternalActivity)
692 where
693 T: tor_linkspec::HasRelayIds + ?Sized,
694 {
695 let mut inner = self.inner.lock().expect("Poisoned lock");
696
697 inner.record_external_success(identity, external_activity, self.runtime.wallclock());
698 }
699
700 /// Return a stream of events about our estimated clock skew; these events
701 /// are `None` when we don't have enough information to make an estimate,
702 /// and `Some(`[`SkewEstimate`]`)` otherwise.
703 ///
704 /// Note that this stream can be lossy: if the estimate changes more than
705 /// one before you read from the stream, you might only get the most recent
706 /// update.
707 pub fn skew_events(&self) -> ClockSkewEvents {
708 let inner = self.inner.lock().expect("Poisoned lock");
709 inner.recv_skew.clone()
710 }
711
712 /// Ensure that the message queue is flushed before proceeding to
713 /// the next step. Used for testing.
714 #[cfg(test)]
715 async fn flush_msg_queue(&self) {
716 let (snd, rcv) = oneshot::channel();
717 let pingmsg = daemon::Msg::Ping(snd);
718 {
719 let inner = self.inner.lock().expect("Poisoned lock");
720 inner
721 .ctrl
722 .unbounded_send(pingmsg)
723 .expect("Guard observer task exited prematurely.");
724 }
725 let _ = rcv.await;
726 }
727}
728
729/// An activity that can succeed or fail, and whose success or failure can be
730/// attributed to a guard.
731#[derive(Copy, Clone, Debug, Eq, PartialEq)]
732#[non_exhaustive]
733pub enum ExternalActivity {
734 /// The activity of using the guard as a directory cache.
735 DirCache,
736}
737
738impl GuardSets {
739 /// Return a reference to the currently active set of guards.
740 ///
741 /// (That's easy enough for now, since there is never more than one set of
742 /// guards. But eventually that will change, as we add support for more
743 /// complex filter types, and for bridge relays. Those will use separate
744 /// `GuardSet` instances, and this accessor will choose the right one.)
745 fn active_guards(&self) -> &GuardSet {
746 self.guards(&self.active_set)
747 }
748
749 /// Return the set of guards corresponding to the provided selector.
750 fn guards(&self, selector: &GuardSetSelector) -> &GuardSet {
751 match selector {
752 GuardSetSelector::Default => &self.default,
753 GuardSetSelector::Restricted => &self.restricted,
754 #[cfg(feature = "bridge-client")]
755 GuardSetSelector::Bridges => &self.bridges,
756 }
757 }
758
759 /// Return a mutable reference to the currently active set of guards.
760 fn active_guards_mut(&mut self) -> &mut GuardSet {
761 self.guards_mut(&self.active_set.clone())
762 }
763
764 /// Return a mutable reference to the set of guards corresponding to the
765 /// provided selector.
766 fn guards_mut(&mut self, selector: &GuardSetSelector) -> &mut GuardSet {
767 match selector {
768 GuardSetSelector::Default => &mut self.default,
769 GuardSetSelector::Restricted => &mut self.restricted,
770 #[cfg(feature = "bridge-client")]
771 GuardSetSelector::Bridges => &mut self.bridges,
772 }
773 }
774
775 /// Update all non-persistent state for the guards in this object with the
776 /// state in `other`.
777 fn copy_status_from(&mut self, mut other: GuardSets) {
778 use strum::IntoEnumIterator;
779 for sample in GuardSetSelector::iter() {
780 self.guards_mut(&sample)
781 .copy_ephemeral_status_into_newly_loaded_state(std::mem::take(
782 other.guards_mut(&sample),
783 ));
784 }
785 self.active_set = other.active_set;
786 }
787}
788
789impl GuardMgrInner {
790 /// Look up the latest [`NetDir`] (if there is one) from our
791 /// [`NetDirProvider`] (if we have one).
792 fn timely_netdir(&self) -> Option<Arc<NetDir>> {
793 self.netdir_provider
794 .as_ref()
795 .and_then(Weak::upgrade)
796 .and_then(|np| np.timely_netdir().ok())
797 }
798
799 /// Look up the latest [`BridgeDescList`](bridge::BridgeDescList) (if there
800 /// is one) from our [`BridgeDescProvider`](bridge::BridgeDescProvider) (if
801 /// we have one).
802 #[cfg(feature = "bridge-client")]
803 fn latest_bridge_desc_list(&self) -> Option<Arc<bridge::BridgeDescList>> {
804 self.bridge_desc_provider
805 .as_ref()
806 .and_then(Weak::upgrade)
807 .map(|bp| bp.bridges())
808 }
809
810 /// Run a function that takes `&mut self` and an optional NetDir.
811 ///
812 /// We try to use the netdir from our [`NetDirProvider`] (if we have one).
813 /// Therefore, although its _parameters_ are suitable for every
814 /// [`GuardSet`], its _contents_ might not be. For those, call
815 /// [`with_opt_universe`](Self::with_opt_universe) instead.
816 //
817 // This function exists to handle the lifetime mess where sometimes the
818 // resulting NetDir will borrow from `netdir`, and sometimes it will borrow
819 // from an Arc returned by `self.latest_netdir()`.
820 fn with_opt_netdir<F, T>(&mut self, func: F) -> T
821 where
822 F: FnOnce(&mut Self, Option<&NetDir>) -> T,
823 {
824 if let Some(nd) = self.timely_netdir() {
825 func(self, Some(nd.as_ref()))
826 } else {
827 func(self, None)
828 }
829 }
830
831 /// Return the latest `BridgeSet` based on our `BridgeDescProvider` and our
832 /// configured bridges.
833 ///
834 /// Returns `None` if we are not configured to use bridges.
835 #[cfg(feature = "bridge-client")]
836 fn latest_bridge_set(&self) -> Option<bridge::BridgeSet> {
837 let bridge_config = self.configured_bridges.as_ref()?.clone();
838 let bridge_descs = self.latest_bridge_desc_list();
839 Some(bridge::BridgeSet::new(bridge_config, bridge_descs))
840 }
841
842 /// Run a function that takes `&mut self` and an optional [`UniverseRef`].
843 ///
844 /// We try to get a universe from the appropriate source for the current
845 /// active guard set.
846 fn with_opt_universe<F, T>(&mut self, func: F) -> T
847 where
848 F: FnOnce(&mut Self, Option<&UniverseRef>) -> T,
849 {
850 // TODO: it might be nice to make `func` take an GuardSet and a set of
851 // parameters, so we can't get the active set wrong. Doing that will
852 // require a fair amount of refactoring so that the borrow checker is
853 // happy, however.
854 match self.guards.active_set.universe_type() {
855 UniverseType::NetDir => {
856 if let Some(nd) = self.timely_netdir() {
857 func(self, Some(&UniverseRef::NetDir(nd)))
858 } else {
859 func(self, None)
860 }
861 }
862 #[cfg(feature = "bridge-client")]
863 UniverseType::BridgeSet => func(
864 self,
865 self.latest_bridge_set()
866 .map(UniverseRef::BridgeSet)
867 .as_ref(),
868 ),
869 }
870 }
871
872 /// Update the status of all guards in the active set, based on the passage
873 /// of time, our configuration, and the relevant Universe for our active
874 /// set.
875 #[instrument(skip_all, level = "trace")]
876 fn update(&mut self, wallclock: SystemTime, now: Instant) {
877 self.with_opt_netdir(|this, netdir| {
878 // Here we update our parameters from the latest NetDir, and check
879 // whether we need to change to a (non)-restrictive GuardSet based
880 // on those parameters and our configured filter.
881 //
882 // This uses a NetDir unconditionally, since we always want to take
883 // the network parameters our parameters from the consensus even if
884 // the guards themselves are from a BridgeSet.
885 this.update_active_set_params_and_filter(netdir);
886 });
887 self.with_opt_universe(|this, univ| {
888 // Now we update the set of guards themselves based on the
889 // Universe, which is either the latest NetDir, or the latest
890 // BridgeSet—depending on what the GuardSet wants.
891 Self::update_guardset_internal(
892 &this.params,
893 wallclock,
894 this.guards.active_set.universe_type(),
895 this.guards.active_guards_mut(),
896 univ,
897 );
898 #[cfg(feature = "bridge-client")]
899 this.update_desired_descriptors(now);
900 #[cfg(not(feature = "bridge-client"))]
901 let _ = now;
902 });
903 }
904
905 /// Replace our bridge configuration with the one from `new_config`.
906 #[cfg(feature = "bridge-client")]
907 #[instrument(level = "trace", skip_all)]
908 fn replace_bridge_config(
909 &mut self,
910 new_config: &impl GuardMgrConfig,
911 wallclock: SystemTime,
912 now: Instant,
913 ) -> Result<RetireCircuits, GuardMgrConfigError> {
914 match (&self.configured_bridges, new_config.bridges_enabled()) {
915 (None, false) => {
916 assert_ne!(
917 self.guards.active_set.universe_type(),
918 UniverseType::BridgeSet
919 );
920 return Ok(RetireCircuits::None); // nothing to do
921 }
922 (_, true) if !self.storage.can_store() => {
923 // TODO: Ideally we would try to upgrade, obtaining an exclusive lock,
924 // but `StorageHandle` currently lacks a method for that.
925 return Err(GuardMgrConfigError::NoLock("bridges configured".into()));
926 }
927 (Some(current_bridges), true) if new_config.bridges() == current_bridges.as_ref() => {
928 assert_eq!(
929 self.guards.active_set.universe_type(),
930 UniverseType::BridgeSet
931 );
932 return Ok(RetireCircuits::None); // nothing to do.
933 }
934 (_, true) => {
935 self.configured_bridges = Some(new_config.bridges().into());
936 self.guards.active_set = GuardSetSelector::Bridges;
937 }
938 (_, false) => {
939 self.configured_bridges = None;
940 self.guards.active_set = GuardSetSelector::Default;
941 }
942 }
943
944 // If we have gotten here, we have changed the set of bridges, changed
945 // which set is active, or changed them both. We need to make sure that
946 // our `GuardSet` object is up-to-date with our configuration.
947 self.update(wallclock, now);
948
949 // We also need to tell the caller that its circuits are no good any
950 // more.
951 //
952 // TODO(nickm): Someday we can do this more judiciously by retuning
953 // "Some" in the case where we're still using bridges but our new bridge
954 // set contains different elements; see comment on RetireCircuits.
955 //
956 // TODO(nickm): We could also safely return RetireCircuits::None if we
957 // are using bridges, and our new bridge list is a superset of the older
958 // one.
959 Ok(RetireCircuits::All)
960 }
961
962 /// Update our parameters, our selection (based on network parameters and
963 /// configuration), and make sure the active GuardSet has the right
964 /// configuration itself.
965 ///
966 /// We should call this whenever the NetDir's parameters change, or whenever
967 /// our filter changes. We do not need to call it for new elements arriving
968 /// in our Universe, since those do not affect anything here.
969 ///
970 /// We should also call this whenever a new GuardSet becomes active for any
971 /// reason _other_ than just having called this function.
972 ///
973 /// (This function is only invoked from `update`, which should be called
974 /// under the above circumstances.)
975 fn update_active_set_params_and_filter(&mut self, netdir: Option<&NetDir>) {
976 // Set the parameters. These always come from the NetDir, even if this
977 // is a bridge set.
978 if let Some(netdir) = netdir {
979 match GuardParams::try_from(netdir.params()) {
980 Ok(params) => self.params = params,
981 Err(e) => warn!("Unusable guard parameters from consensus: {}", e),
982 }
983
984 self.select_guard_set_based_on_filter(netdir);
985 }
986
987 // Change the filter, if it doesn't match what the guards have.
988 //
989 // TODO(nickm): We could use a "dirty" flag or something to decide
990 // whether we need to call set_filter, if this comparison starts to show
991 // up in profiles.
992 if self.guards.active_guards().filter() != &self.filter {
993 let restrictive = self.guards.active_set == GuardSetSelector::Restricted;
994 self.guards
995 .active_guards_mut()
996 .set_filter(self.filter.clone(), restrictive);
997 }
998 }
999
1000 /// Update the status of every guard in `active_guards`, and expand it as
1001 /// needed.
1002 ///
1003 /// This function doesn't take `&self`, to make sure that we are only
1004 /// affecting a single `GuardSet`, and to avoid confusing the borrow
1005 /// checker.
1006 ///
1007 /// We should call this whenever the contents of the universe have changed.
1008 ///
1009 /// We should also call this whenever a new GuardSet becomes active.
1010 fn update_guardset_internal<U: Universe>(
1011 params: &GuardParams,
1012 now: SystemTime,
1013 universe_type: UniverseType,
1014 active_guards: &mut GuardSet,
1015 universe: Option<&U>,
1016 ) -> ExtendedStatus {
1017 // Expire guards. Do that early, in case doing so makes it clear that
1018 // we need to grab more guards or mark others as primary.
1019 active_guards.expire_old_guards(params, now);
1020
1021 let extended = if let Some(universe) = universe {
1022 // TODO: This check here may be completely unnecessary. I inserted
1023 // it back in 5ac0fcb7ef603e0d14 because I was originally concerned
1024 // it might be undesirable to list a primary guard as "missing dir
1025 // info" (and therefore unusable) if we were expecting to get its
1026 // microdescriptor "very soon."
1027 //
1028 // But due to the other check in `netdir_is_sufficient`, we
1029 // shouldn't be installing a netdir until it has microdescs for all
1030 // of the (non-bridge) primary guards that it lists. - nickm
1031 let n = active_guards.n_primary_without_id_info_in(universe);
1032 if n > 0 && universe_type == UniverseType::NetDir {
1033 // We are missing the information from a NetDir needed to see
1034 // whether our primary guards are listed, so we shouldn't update
1035 // our guard status.
1036 //
1037 // We don't want to do this check if we are using bridges, since
1038 // a missing bridge descriptor is not guaranteed to temporary
1039 // problem in the same way that a missing microdescriptor is.
1040 // (When a bridge desc is missing, the bridge could be down or
1041 // unreachable, and nobody else can help us. But if a microdesc
1042 // is missing, we just need to find a cache that has it.)
1043 trace!(
1044 n_primary_without_id_info = n,
1045 "Not extending guardset, missing information."
1046 );
1047 return ExtendedStatus::No;
1048 }
1049 active_guards.update_status_from_dir(universe);
1050 active_guards.extend_sample_as_needed(now, params, universe)
1051 } else {
1052 trace!("Not extending guardset, no universe given.");
1053 ExtendedStatus::No
1054 };
1055
1056 active_guards.select_primary_guards(params);
1057
1058 extended
1059 }
1060
1061 /// If using bridges, tell the BridgeDescProvider which descriptors we want.
1062 /// We need to check this *after* we select our primary guards.
1063 #[cfg(feature = "bridge-client")]
1064 fn update_desired_descriptors(&mut self, now: Instant) {
1065 if self.guards.active_set.universe_type() != UniverseType::BridgeSet {
1066 return;
1067 }
1068
1069 let provider = self.bridge_desc_provider.as_ref().and_then(Weak::upgrade);
1070 let bridge_set = self.latest_bridge_set();
1071 if let (Some(provider), Some(bridge_set)) = (provider, bridge_set) {
1072 let desired: Vec<_> = self
1073 .guards
1074 .active_guards()
1075 .descriptors_to_request(now, &self.params)
1076 .into_iter()
1077 .flat_map(|guard| bridge_set.bridge_by_guard(guard))
1078 .cloned()
1079 .collect();
1080
1081 provider.set_bridges(&desired);
1082 }
1083 }
1084
1085 /// Replace the active guard state with `new_state`, preserving
1086 /// non-persistent state for any guards that are retained.
1087 #[instrument(level = "trace", skip_all)]
1088 fn replace_guards_with(
1089 &mut self,
1090 mut new_guards: GuardSets,
1091 wallclock: SystemTime,
1092 now: Instant,
1093 ) {
1094 std::mem::swap(&mut self.guards, &mut new_guards);
1095 self.guards.copy_status_from(new_guards);
1096 self.update(wallclock, now);
1097 }
1098
1099 /// Update which guard set is active based on the current filter and the
1100 /// provided netdir.
1101 ///
1102 /// After calling this function, the new guard set's filter may be
1103 /// out-of-date: be sure to call `set_filter` as appropriate.
1104 fn select_guard_set_based_on_filter(&mut self, netdir: &NetDir) {
1105 // In general, we'd like to use the restricted set if we're under the
1106 // threshold, and the default set if we're over the threshold. But if
1107 // we're sitting close to the threshold, we want to avoid flapping back
1108 // and forth, so we only change when we're more than 5% "off" from
1109 // whatever our current setting is.
1110 //
1111 // (See guard-spec section 2 for more information.)
1112 let offset = match self.guards.active_set {
1113 GuardSetSelector::Default => -0.05,
1114 GuardSetSelector::Restricted => 0.05,
1115 // If we're using bridges, then we don't switch between the other guard sets based on on the filter at all.
1116 #[cfg(feature = "bridge-client")]
1117 GuardSetSelector::Bridges => return,
1118 };
1119 let frac_permitted = self.filter.frac_bw_permitted(netdir);
1120 let threshold = self.params.filter_threshold + offset;
1121 let new_choice = if frac_permitted < threshold {
1122 GuardSetSelector::Restricted
1123 } else {
1124 GuardSetSelector::Default
1125 };
1126
1127 if new_choice != self.guards.active_set {
1128 info!(
1129 "Guard selection changed; we are now using the {:?} guard set",
1130 &new_choice
1131 );
1132
1133 self.guards.active_set = new_choice;
1134
1135 if frac_permitted < self.params.extreme_threshold {
1136 warn!(
1137 "The number of guards permitted is smaller than the recommended minimum of {:.0}%.",
1138 self.params.extreme_threshold * 100.0,
1139 );
1140 }
1141 }
1142 }
1143
1144 /// Mark all of our primary guards as retriable, if we haven't done
1145 /// so since long enough before `now`.
1146 ///
1147 /// We want to call this function whenever a guard attempt succeeds,
1148 /// if the internet seemed to be down when the guard attempt was
1149 /// first launched.
1150 fn maybe_retry_primary_guards(&mut self, now: Instant) {
1151 // We don't actually want to mark our primary guards as
1152 // retriable more than once per internet_down_timeout: after
1153 // the first time, we would just be noticing the same "coming
1154 // back online" event more than once.
1155 let interval = self.params.internet_down_timeout;
1156 if self.last_primary_retry_time + interval <= now {
1157 debug!(
1158 "Successfully reached a guard after a while off the internet; marking all primary guards retriable."
1159 );
1160 self.guards
1161 .active_guards_mut()
1162 .mark_primary_guards_retriable();
1163 self.last_primary_retry_time = now;
1164 }
1165 }
1166
1167 /// Replace the current GuardFilter with `filter`.
1168 #[instrument(level = "trace", skip_all)]
1169 fn set_filter(&mut self, filter: GuardFilter, wallclock: SystemTime, now: Instant) {
1170 self.filter = filter;
1171 self.update(wallclock, now);
1172 }
1173
1174 /// Called when the circuit manager reports (via [`GuardMonitor`]) that
1175 /// a guard succeeded or failed.
1176 ///
1177 /// Changes the guard's status as appropriate, and updates the pending
1178 /// request as needed.
1179 #[allow(clippy::cognitive_complexity)]
1180 pub(crate) fn handle_msg(
1181 &mut self,
1182 request_id: RequestId,
1183 status: GuardStatus,
1184 skew: Option<ClockSkew>,
1185 runtime: &impl tor_rtcompat::SleepProvider,
1186 ) {
1187 if let Some(mut pending) = self.pending.remove(&request_id) {
1188 // If there was a pending request matching this RequestId, great!
1189 let guard_id = pending.guard_id();
1190 trace!(?guard_id, ?status, "Received report of guard status");
1191
1192 // First, handle the skew report (if any)
1193 if let Some(skew) = skew {
1194 let now = runtime.now();
1195 let observation = skew::SkewObservation { skew, when: now };
1196
1197 match &guard_id.0 {
1198 FirstHopIdInner::Guard(_, id) => {
1199 self.guards.active_guards_mut().record_skew(id, observation);
1200 }
1201 FirstHopIdInner::Fallback(id) => {
1202 self.fallbacks.note_skew(id, observation);
1203 }
1204 }
1205 // TODO: We call this whenever we receive an observed clock
1206 // skew. That's not the perfect timing for two reasons. First
1207 // off, it might be too frequent: it does an O(n) calculation,
1208 // which isn't ideal. Second, it might be too infrequent: after
1209 // an hour has passed, a given observation won't be up-to-date
1210 // any more, and we might want to recalculate the skew
1211 // accordingly.
1212 self.update_skew(now);
1213 }
1214
1215 match (status, &guard_id.0) {
1216 (GuardStatus::Failure, FirstHopIdInner::Fallback(id)) => {
1217 // We used a fallback, and we weren't able to build a circuit through it.
1218 self.fallbacks.note_failure(id, runtime.now());
1219 }
1220 (_, FirstHopIdInner::Fallback(_)) => {
1221 // We don't record any other kind of circuit activity if we
1222 // took the entry from the fallback list.
1223 }
1224 (GuardStatus::Success, FirstHopIdInner::Guard(sample, id)) => {
1225 // If we had gone too long without any net activity when we
1226 // gave out this guard, and now we're seeing a circuit
1227 // succeed, tell the primary guards that they might be
1228 // retriable.
1229 if pending.net_has_been_down() {
1230 self.maybe_retry_primary_guards(runtime.now());
1231 }
1232
1233 // The guard succeeded. Tell the GuardSet.
1234 self.guards.guards_mut(sample).record_success(
1235 id,
1236 &self.params,
1237 None,
1238 runtime.wallclock(),
1239 );
1240 // Either tell the request whether the guard is
1241 // usable, or schedule it as a "waiting" request.
1242 if let Some(usable) = self.guard_usability_status(&pending, runtime.now()) {
1243 trace!(?guard_id, usable, "Known usability status");
1244 pending.reply(usable);
1245 } else {
1246 // This is the one case where we can't use the
1247 // guard yet.
1248 trace!(?guard_id, "Not able to answer right now");
1249 pending.mark_waiting(runtime.now());
1250 self.waiting.push(pending);
1251 }
1252 }
1253 (GuardStatus::Failure, FirstHopIdInner::Guard(sample, id)) => {
1254 self.guards
1255 .guards_mut(sample)
1256 .record_failure(id, None, runtime.now());
1257 pending.reply(false);
1258 }
1259 (GuardStatus::AttemptAbandoned, FirstHopIdInner::Guard(sample, id)) => {
1260 self.guards.guards_mut(sample).record_attempt_abandoned(id);
1261 pending.reply(false);
1262 }
1263 (GuardStatus::Indeterminate, FirstHopIdInner::Guard(sample, id)) => {
1264 self.guards
1265 .guards_mut(sample)
1266 .record_indeterminate_result(id);
1267 pending.reply(false);
1268 }
1269 };
1270 } else {
1271 warn!(
1272 "Got a status {:?} for a request {:?} that wasn't pending",
1273 status, request_id
1274 );
1275 }
1276
1277 // We might need to update the primary guards based on changes in the
1278 // status of guards above.
1279 self.guards
1280 .active_guards_mut()
1281 .select_primary_guards(&self.params);
1282
1283 // Some waiting request may just have become ready (usable or
1284 // not); we need to give them the information they're waiting
1285 // for.
1286 self.expire_and_answer_pending_requests(runtime.now());
1287 }
1288
1289 /// Helper to implement `GuardMgr::note_external_success()`.
1290 ///
1291 /// (This has to be a separate function so that we can borrow params while
1292 /// we have `mut self` borrowed.)
1293 fn record_external_success<T>(
1294 &mut self,
1295 identity: &T,
1296 external_activity: ExternalActivity,
1297 now: SystemTime,
1298 ) where
1299 T: tor_linkspec::HasRelayIds + ?Sized,
1300 {
1301 for id in self.lookup_ids(identity) {
1302 match &id.0 {
1303 FirstHopIdInner::Guard(sample, id) => {
1304 self.guards.guards_mut(sample).record_success(
1305 id,
1306 &self.params,
1307 Some(external_activity),
1308 now,
1309 );
1310 }
1311 FirstHopIdInner::Fallback(id) => {
1312 if external_activity == ExternalActivity::DirCache {
1313 self.fallbacks.note_success(id);
1314 }
1315 }
1316 }
1317 }
1318 }
1319
1320 /// Return an iterator over all of the clock skew observations we've made
1321 /// for guards or fallbacks.
1322 fn skew_observations(&self) -> impl Iterator<Item = &skew::SkewObservation> {
1323 self.fallbacks
1324 .skew_observations()
1325 .chain(self.guards.active_guards().skew_observations())
1326 }
1327
1328 /// Recalculate our estimated clock skew, and publish it to anybody who
1329 /// cares.
1330 fn update_skew(&mut self, now: Instant) {
1331 let estimate = skew::SkewEstimate::estimate_skew(self.skew_observations(), now);
1332 // TODO: we might want to do this only conditionally, when the skew
1333 // estimate changes.
1334 *self.send_skew.borrow_mut() = estimate;
1335 }
1336
1337 /// If the circuit built because of a given [`PendingRequest`] may
1338 /// now be used (or discarded), return `Some(true)` or
1339 /// `Some(false)` respectively.
1340 ///
1341 /// Return None if we can't yet give an answer about whether such
1342 /// a circuit is usable.
1343 fn guard_usability_status(&self, pending: &PendingRequest, now: Instant) -> Option<bool> {
1344 match &pending.guard_id().0 {
1345 FirstHopIdInner::Guard(sample, id) => self.guards.guards(sample).circ_usability_status(
1346 id,
1347 pending.usage(),
1348 &self.params,
1349 now,
1350 ),
1351 // Fallback circuits are usable immediately, since we don't have to wait to
1352 // see whether any _other_ circuit succeeds or fails.
1353 FirstHopIdInner::Fallback(_) => Some(true),
1354 }
1355 }
1356
1357 /// For requests that have been "waiting" for an answer for too long,
1358 /// expire them and tell the circuit manager that their circuits
1359 /// are unusable.
1360 fn expire_and_answer_pending_requests(&mut self, now: Instant) {
1361 // A bit ugly: we use a separate Vec here to avoid borrowing issues,
1362 // and put it back when we're done.
1363 let mut waiting = Vec::new();
1364 std::mem::swap(&mut waiting, &mut self.waiting);
1365
1366 waiting.retain_mut(|pending| {
1367 let expired = pending
1368 .waiting_since()
1369 .and_then(|w| now.checked_duration_since(w))
1370 .map(|d| d >= self.params.np_idle_timeout)
1371 == Some(true);
1372 if expired {
1373 trace!(?pending, "Pending request expired");
1374 pending.reply(false);
1375 return false;
1376 }
1377
1378 // TODO-SPEC: guard_usability_status isn't what the spec says. It
1379 // says instead that we should look at _circuit_ status, saying:
1380 // " Definition: In the algorithm above, C2 "blocks" C1 if:
1381 // * C2 obeys all the restrictions that C1 had to obey, AND
1382 // * C2 has higher priority than C1, AND
1383 // * Either C2 is <complete>, or C2 is <waiting_for_better_guard>,
1384 // or C2 has been <usable_if_no_better_guard> for no more than
1385 // {NONPRIMARY_GUARD_CONNECT_TIMEOUT} seconds."
1386 //
1387 // See comments in sample::GuardSet::circ_usability_status.
1388
1389 if let Some(answer) = self.guard_usability_status(pending, now) {
1390 trace!(?pending, answer, "Pending request now ready");
1391 pending.reply(answer);
1392 return false;
1393 }
1394 true
1395 });
1396
1397 // Put the waiting list back.
1398 std::mem::swap(&mut waiting, &mut self.waiting);
1399 }
1400
1401 /// Return every currently extant FirstHopId for a guard or fallback
1402 /// directory matching (or possibly matching) the provided keys.
1403 ///
1404 /// An identity is _possibly matching_ if it contains some of the IDs in the
1405 /// provided identity, and it has no _contradictory_ identities, but it does
1406 /// not necessarily contain _all_ of those identities.
1407 ///
1408 /// # TODO
1409 ///
1410 /// This function should probably not exist; it's only used so that dirmgr
1411 /// can report successes or failures, since by the time it observes them it
1412 /// doesn't know whether its circuit came from a guard or a fallback. To
1413 /// solve that, we'll need CircMgr to record and report which one it was
1414 /// using, which will take some more plumbing.
1415 ///
1416 /// TODO relay: we will have to make the change above when we implement
1417 /// relays; otherwise, it would be possible for an attacker to exploit it to
1418 /// mislead us about our guard status.
1419 fn lookup_ids<T>(&self, identity: &T) -> Vec<FirstHopId>
1420 where
1421 T: tor_linkspec::HasRelayIds + ?Sized,
1422 {
1423 use strum::IntoEnumIterator;
1424 let mut vec = Vec::with_capacity(2);
1425
1426 let id = ids::GuardId::from_relay_ids(identity);
1427 for sample in GuardSetSelector::iter() {
1428 let guard_id = match self.guards.guards(&sample).contains(&id) {
1429 Ok(true) => &id,
1430 Err(other) => other,
1431 Ok(false) => continue,
1432 };
1433 vec.push(FirstHopId(FirstHopIdInner::Guard(sample, guard_id.clone())));
1434 }
1435
1436 let id = ids::FallbackId::from_relay_ids(identity);
1437 if self.fallbacks.contains(&id) {
1438 vec.push(id.into());
1439 }
1440
1441 vec
1442 }
1443
1444 /// Run any periodic events that update guard status, and return a
1445 /// duration after which periodic events should next be run.
1446 #[instrument(skip_all, level = "trace")]
1447 pub(crate) fn run_periodic_events(&mut self, wallclock: SystemTime, now: Instant) -> Duration {
1448 self.update(wallclock, now);
1449 self.expire_and_answer_pending_requests(now);
1450 Duration::from_secs(1) // TODO: Too aggressive.
1451 }
1452
1453 /// Try to select a guard, expanding the sample if the first attempt fails.
1454 #[instrument(skip_all, level = "trace")]
1455 fn select_guard_with_expand(
1456 &mut self,
1457 usage: &GuardUsage,
1458 now: Instant,
1459 wallclock: SystemTime,
1460 ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1461 // Try to find a guard.
1462 let first_error = match self.select_guard_once(usage, now) {
1463 Ok(res1) => return Ok(res1),
1464 Err(e) => {
1465 trace!("Couldn't select guard on first attempt: {}", e);
1466 e
1467 }
1468 };
1469
1470 // That didn't work. If we have a netdir, expand the sample and try again.
1471 let res = self.with_opt_universe(|this, univ| {
1472 let univ = univ?;
1473 trace!("No guards available, trying to extend the sample.");
1474 // Make sure that the status on all of our guards are accurate, and
1475 // expand the sample if we can.
1476 //
1477 // Our parameters and configuration did not change, so we do not
1478 // need to call update() or update_active_set_and_filter(). This
1479 // call is sufficient to extend the sample and recompute primary
1480 // guards.
1481 let extended = Self::update_guardset_internal(
1482 &this.params,
1483 wallclock,
1484 this.guards.active_set.universe_type(),
1485 this.guards.active_guards_mut(),
1486 Some(univ),
1487 );
1488 if extended == ExtendedStatus::Yes {
1489 match this.select_guard_once(usage, now) {
1490 Ok(res) => return Some(res),
1491 Err(e) => {
1492 trace!("Couldn't select guard after update: {}", e);
1493 }
1494 }
1495 }
1496 None
1497 });
1498 if let Some(res) = res {
1499 return Ok(res);
1500 }
1501
1502 // Okay, that didn't work either. If we were asked for a directory
1503 // guard, and we aren't using bridges, then we may be able to use a
1504 // fallback.
1505 if usage.kind == GuardUsageKind::OneHopDirectory
1506 && self.guards.active_set.universe_type() == UniverseType::NetDir
1507 {
1508 return self.select_fallback(now);
1509 }
1510
1511 // Couldn't extend the sample or use a fallback; return the original error.
1512 Err(first_error)
1513 }
1514
1515 /// Helper: try to pick a single guard, without retrying on failure.
1516 fn select_guard_once(
1517 &self,
1518 usage: &GuardUsage,
1519 now: Instant,
1520 ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1521 let active_set = &self.guards.active_set;
1522 #[cfg_attr(not(feature = "bridge-client"), allow(unused_mut))]
1523 let (list_kind, mut first_hop) =
1524 self.guards
1525 .guards(active_set)
1526 .pick_guard(active_set, usage, &self.params, now)?;
1527 #[cfg(feature = "bridge-client")]
1528 if self.guards.active_set.universe_type() == UniverseType::BridgeSet {
1529 // See if we can promote first_hop to a viable CircTarget.
1530 let bridges = self.latest_bridge_set().ok_or_else(|| {
1531 PickGuardError::Internal(internal!(
1532 "No bridge set available, even though this is the Bridges sample"
1533 ))
1534 })?;
1535 first_hop.lookup_bridge_circ_target(&bridges);
1536
1537 if usage.kind == GuardUsageKind::Data && !first_hop.contains_circ_target() {
1538 return Err(PickGuardError::Internal(internal!(
1539 "Tried to return a non-circtarget guard with Data usage!"
1540 )));
1541 }
1542 }
1543 Ok((list_kind, first_hop))
1544 }
1545
1546 /// Helper: Select a fallback directory.
1547 ///
1548 /// Called when we have no guard information to use. Return values are as
1549 /// for [`GuardMgr::select_guard()`]
1550 fn select_fallback(
1551 &self,
1552 now: Instant,
1553 ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1554 let filt = self.guards.active_guards().filter();
1555
1556 let fallback = crate::FirstHop {
1557 sample: None,
1558 inner: crate::FirstHopInner::Chan(OwnedChanTarget::from_chan_target(
1559 self.fallbacks.choose(&mut rand::rng(), now, filt)?,
1560 )),
1561 };
1562 let fallback = filt.modify_hop(fallback)?;
1563 Ok((sample::ListKind::Fallback, fallback))
1564 }
1565}
1566
1567/// A possible outcome of trying to extend a guard sample.
1568#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1569enum ExtendedStatus {
1570 /// The guard sample was extended. (At least one guard was added to it.)
1571 Yes,
1572 /// The guard sample was not extended.
1573 No,
1574}
1575
1576/// A set of parameters, derived from the consensus document, controlling
1577/// the behavior of a guard manager.
1578#[derive(Debug, Clone)]
1579#[cfg_attr(test, derive(PartialEq))]
1580struct GuardParams {
1581 /// How long should a sampled, un-confirmed guard be kept in the sample before it expires?
1582 lifetime_unconfirmed: Duration,
1583 /// How long should a confirmed guard be kept in the sample before
1584 /// it expires?
1585 lifetime_confirmed: Duration,
1586 /// How long may a guard be unlisted before we remove it from the sample?
1587 lifetime_unlisted: Duration,
1588 /// Largest number of guards we're willing to add to the sample.
1589 max_sample_size: usize,
1590 /// Largest fraction of the network's guard bandwidth that we're
1591 /// willing to add to the sample.
1592 max_sample_bw_fraction: f64,
1593 /// Smallest number of guards that we're willing to have in the
1594 /// sample, after applying a [`GuardFilter`].
1595 min_filtered_sample_size: usize,
1596 /// How many guards are considered "Primary"?
1597 n_primary: usize,
1598 /// When making a regular circuit, how many primary guards should we
1599 /// be willing to try?
1600 data_parallelism: usize,
1601 /// When making a one-hop directory circuit, how many primary
1602 /// guards should we be willing to try?
1603 dir_parallelism: usize,
1604 /// For how long does a pending attempt to connect to a guard
1605 /// block an attempt to use a less-favored non-primary guard?
1606 np_connect_timeout: Duration,
1607 /// How long do we allow a circuit to a successful but unfavored
1608 /// non-primary guard to sit around before deciding not to use it?
1609 np_idle_timeout: Duration,
1610 /// After how much time without successful activity does a
1611 /// successful circuit indicate that we should retry our primary
1612 /// guards?
1613 internet_down_timeout: Duration,
1614 /// What fraction of the guards can be can be filtered out before we
1615 /// decide that our filter is "very restrictive"?
1616 filter_threshold: f64,
1617 /// What fraction of the guards determine that our filter is "very
1618 /// restrictive"?
1619 extreme_threshold: f64,
1620}
1621
1622impl Default for GuardParams {
1623 fn default() -> Self {
1624 let one_day = Duration::from_secs(86400);
1625 GuardParams {
1626 lifetime_unconfirmed: one_day * 120,
1627 lifetime_confirmed: one_day * 60,
1628 lifetime_unlisted: one_day * 20,
1629 max_sample_size: 60,
1630 max_sample_bw_fraction: 0.2,
1631 min_filtered_sample_size: 20,
1632 n_primary: 3,
1633 data_parallelism: 1,
1634 dir_parallelism: 3,
1635 np_connect_timeout: Duration::from_secs(15),
1636 np_idle_timeout: Duration::from_secs(600),
1637 internet_down_timeout: Duration::from_secs(600),
1638 filter_threshold: 0.2,
1639 extreme_threshold: 0.01,
1640 }
1641 }
1642}
1643
1644impl TryFrom<&NetParameters> for GuardParams {
1645 type Error = tor_units::Error;
1646 fn try_from(p: &NetParameters) -> Result<GuardParams, Self::Error> {
1647 Ok(GuardParams {
1648 lifetime_unconfirmed: p.guard_lifetime_unconfirmed.try_into()?,
1649 lifetime_confirmed: p.guard_lifetime_confirmed.try_into()?,
1650 lifetime_unlisted: p.guard_remove_unlisted_after.try_into()?,
1651 max_sample_size: p.guard_max_sample_size.try_into()?,
1652 max_sample_bw_fraction: p.guard_max_sample_threshold.as_fraction(),
1653 min_filtered_sample_size: p.guard_filtered_min_sample_size.try_into()?,
1654 n_primary: p.guard_n_primary.try_into()?,
1655 data_parallelism: p.guard_use_parallelism.try_into()?,
1656 dir_parallelism: p.guard_dir_use_parallelism.try_into()?,
1657 np_connect_timeout: p.guard_nonprimary_connect_timeout.try_into()?,
1658 np_idle_timeout: p.guard_nonprimary_idle_timeout.try_into()?,
1659 internet_down_timeout: p.guard_internet_likely_down.try_into()?,
1660 filter_threshold: p.guard_meaningful_restriction.as_fraction(),
1661 extreme_threshold: p.guard_extreme_restriction.as_fraction(),
1662 })
1663 }
1664}
1665
1666/// Representation of a guard or fallback, as returned by [`GuardMgr::select_guard()`].
1667#[derive(Debug, Clone)]
1668pub struct FirstHop {
1669 /// The sample from which this guard was taken, or `None` if this is a fallback.
1670 sample: Option<GuardSetSelector>,
1671 /// Information about connecting to (or through) this guard.
1672 inner: FirstHopInner,
1673}
1674/// The enumeration inside a FirstHop that holds information about how to
1675/// connect to (and possibly through) a guard or fallback.
1676#[derive(Debug, Clone)]
1677enum FirstHopInner {
1678 /// We have enough information to connect to a guard.
1679 Chan(OwnedChanTarget),
1680 /// We have enough information to connect to a guards _and_ to build
1681 /// multihop circuits through it.
1682 #[cfg_attr(not(feature = "bridge-client"), allow(dead_code))]
1683 Circ(OwnedCircTarget),
1684}
1685
1686impl FirstHop {
1687 /// Return a new [`FirstHopId`] for this `FirstHop`.
1688 fn first_hop_id(&self) -> FirstHopId {
1689 match &self.sample {
1690 Some(sample) => {
1691 let guard_id = GuardId::from_relay_ids(self);
1692 FirstHopId::in_sample(sample.clone(), guard_id)
1693 }
1694 None => {
1695 let fallback_id = crate::ids::FallbackId::from_relay_ids(self);
1696 FirstHopId::from(fallback_id)
1697 }
1698 }
1699 }
1700
1701 /// Look up this guard in `netdir`.
1702 pub fn get_relay<'a>(&self, netdir: &'a NetDir) -> Option<Relay<'a>> {
1703 match &self.sample {
1704 #[cfg(feature = "bridge-client")]
1705 // Always return "None" for anything that isn't in the netdir.
1706 Some(s) if s.universe_type() == UniverseType::BridgeSet => None,
1707 // Otherwise ask the netdir.
1708 _ => netdir.by_ids(self),
1709 }
1710 }
1711
1712 /// Return true if this guard is a bridge.
1713 pub fn is_bridge(&self) -> bool {
1714 match &self.sample {
1715 #[cfg(feature = "bridge-client")]
1716 Some(s) if s.universe_type() == UniverseType::BridgeSet => true,
1717 _ => false,
1718 }
1719 }
1720
1721 /// If possible, return a view of this object that can be used to build a circuit.
1722 pub fn as_circ_target(&self) -> Option<&OwnedCircTarget> {
1723 match &self.inner {
1724 FirstHopInner::Chan(_) => None,
1725 FirstHopInner::Circ(ct) => Some(ct),
1726 }
1727 }
1728
1729 /// Return a view of this as an OwnedChanTarget.
1730 fn chan_target_mut(&mut self) -> &mut OwnedChanTarget {
1731 match &mut self.inner {
1732 FirstHopInner::Chan(ct) => ct,
1733 FirstHopInner::Circ(ct) => ct.chan_target_mut(),
1734 }
1735 }
1736
1737 /// If possible and appropriate, find a circuit target in `bridges` for this
1738 /// `FirstHop`, and make this `FirstHop` a viable circuit target.
1739 ///
1740 /// (By default, any `FirstHop` that a `GuardSet` returns will have enough
1741 /// information to be a `ChanTarget`, but it will be lacking the additional
1742 /// network information in `CircTarget`[^1] necessary for us to build a
1743 /// multi-hop circuit through it. If this FirstHop is a regular non-bridge
1744 /// `Relay`, then the `CircMgr` will later look up that circuit information
1745 /// itself from the network directory. But if this `FirstHop` *is* a bridge,
1746 /// then we need to find that information in the `BridgeSet`, since the
1747 /// CircMgr does not keep track of the `BridgeSet`.)
1748 ///
1749 /// [^1]: For example, supported protocol versions and ntor keys.
1750 #[cfg(feature = "bridge-client")]
1751 fn lookup_bridge_circ_target(&mut self, bridges: &bridge::BridgeSet) {
1752 use crate::sample::CandidateStatus::Present;
1753 if self.sample.as_ref().map(|s| s.universe_type()) == Some(UniverseType::BridgeSet)
1754 && matches!(self.inner, FirstHopInner::Chan(_))
1755 {
1756 if let Present(bridge_relay) = bridges.bridge_relay_by_guard(self) {
1757 if let Some(circ_target) = bridge_relay.as_relay_with_desc() {
1758 self.inner =
1759 FirstHopInner::Circ(OwnedCircTarget::from_circ_target(&circ_target));
1760 }
1761 }
1762 }
1763 }
1764
1765 /// Return true if this `FirstHop` contains circuit target information.
1766 ///
1767 /// This is true if `lookup_bridge_circ_target()` has been called, and it
1768 /// successfully found the circuit target information.
1769 #[cfg(feature = "bridge-client")]
1770 fn contains_circ_target(&self) -> bool {
1771 matches!(self.inner, FirstHopInner::Circ(_))
1772 }
1773}
1774
1775// This is somewhat redundant with the implementations in crate::guard::Guard.
1776impl tor_linkspec::HasAddrs for FirstHop {
1777 fn addrs(&self) -> impl Iterator<Item = SocketAddr> {
1778 match &self.inner {
1779 FirstHopInner::Chan(ct) => Either::Left(ct.addrs()),
1780 FirstHopInner::Circ(ct) => Either::Right(ct.addrs()),
1781 }
1782 }
1783}
1784impl tor_linkspec::HasRelayIds for FirstHop {
1785 fn identity(
1786 &self,
1787 key_type: tor_linkspec::RelayIdType,
1788 ) -> Option<tor_linkspec::RelayIdRef<'_>> {
1789 match &self.inner {
1790 FirstHopInner::Chan(ct) => ct.identity(key_type),
1791 FirstHopInner::Circ(ct) => ct.identity(key_type),
1792 }
1793 }
1794}
1795impl tor_linkspec::HasChanMethod for FirstHop {
1796 fn chan_method(&self) -> tor_linkspec::ChannelMethod {
1797 match &self.inner {
1798 FirstHopInner::Chan(ct) => ct.chan_method(),
1799 FirstHopInner::Circ(ct) => ct.chan_method(),
1800 }
1801 }
1802}
1803impl tor_linkspec::ChanTarget for FirstHop {}
1804
1805/// The purpose for which we plan to use a guard.
1806///
1807/// This can affect the guard selection algorithm.
1808#[derive(Clone, Debug, Default, Eq, PartialEq)]
1809#[non_exhaustive]
1810pub enum GuardUsageKind {
1811 /// We want to use this guard for a data circuit.
1812 ///
1813 /// (This encompasses everything except the `OneHopDirectory` case.)
1814 #[default]
1815 Data,
1816 /// We want to use this guard for a one-hop, non-anonymous
1817 /// directory request.
1818 ///
1819 /// (Our algorithm allows more parallelism for the guards that we use
1820 /// for these circuits.)
1821 OneHopDirectory,
1822}
1823
1824/// A set of parameters describing how a single guard should be selected.
1825///
1826/// Used as an argument to [`GuardMgr::select_guard`].
1827#[derive(Clone, Debug, derive_builder::Builder)]
1828#[builder(build_fn(error = "tor_config::ConfigBuildError"))]
1829pub struct GuardUsage {
1830 /// The purpose for which this guard will be used.
1831 #[builder(default)]
1832 kind: GuardUsageKind,
1833 /// A list of restrictions on which guard may be used.
1834 ///
1835 /// The default is the empty list.
1836 #[builder(sub_builder, setter(custom))]
1837 restrictions: GuardRestrictionList,
1838}
1839
1840impl_standard_builder! { GuardUsage: !Deserialize }
1841
1842/// List of socket restrictions, as configured
1843pub type GuardRestrictionList = Vec<GuardRestriction>;
1844
1845define_list_builder_helper! {
1846 pub struct GuardRestrictionListBuilder {
1847 restrictions: [GuardRestriction],
1848 }
1849 built: GuardRestrictionList = restrictions;
1850 default = vec![];
1851 item_build: |restriction| Ok(restriction.clone());
1852}
1853
1854define_list_builder_accessors! {
1855 struct GuardUsageBuilder {
1856 pub restrictions: [GuardRestriction],
1857 }
1858}
1859
1860impl GuardUsageBuilder {
1861 /// Create a new empty [`GuardUsageBuilder`].
1862 pub fn new() -> Self {
1863 Self::default()
1864 }
1865}
1866
1867/// A restriction that applies to a single request for a guard.
1868///
1869/// Restrictions differ from filters (see [`GuardFilter`]) in that
1870/// they apply to single requests, not to our entire set of guards.
1871/// They're suitable for things like making sure that we don't start
1872/// and end a circuit at the same relay, or requiring a specific
1873/// subprotocol version for certain kinds of requests.
1874#[derive(Clone, Debug, Serialize, Deserialize)]
1875#[non_exhaustive]
1876pub enum GuardRestriction {
1877 /// Don't pick a guard with the provided identity.
1878 AvoidId(RelayId),
1879 /// Don't pick a guard with any of the provided Ed25519 identities.
1880 AvoidAllIds(RelayIdSet),
1881}
1882
1883/// The kind of vanguards to use.
1884#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1885#[derive(Serialize, Deserialize)] //
1886#[derive(derive_more::Display)] //
1887#[serde(rename_all = "lowercase")]
1888#[cfg(feature = "vanguards")]
1889#[non_exhaustive]
1890pub enum VanguardMode {
1891 /// "Lite" vanguards.
1892 #[default]
1893 #[display("lite")]
1894 Lite = 1,
1895 /// "Full" vanguards.
1896 #[display("full")]
1897 Full = 2,
1898 /// Vanguards are disabled.
1899 #[display("disabled")]
1900 Disabled = 0,
1901}
1902
1903#[cfg(feature = "vanguards")]
1904impl VanguardMode {
1905 /// Build a `VanguardMode` from a [`NetParameters`] parameter.
1906 ///
1907 /// Used for converting [`vanguards_enabled`](NetParameters::vanguards_enabled)
1908 /// or [`vanguards_hs_service`](NetParameters::vanguards_hs_service)
1909 /// to the corresponding `VanguardMode`.
1910 pub(crate) fn from_net_parameter(val: BoundedInt32<0, 2>) -> Self {
1911 match val.get() {
1912 0 => VanguardMode::Disabled,
1913 1 => VanguardMode::Lite,
1914 2 => VanguardMode::Full,
1915 _ => unreachable!("BoundedInt32 was not bounded?!"),
1916 }
1917 }
1918}
1919
1920impl_not_auto_value!(VanguardMode);
1921
1922/// Vanguards configuration.
1923#[derive(Debug, Default, Clone, Eq, PartialEq, derive_builder::Builder)]
1924#[builder(build_fn(error = "ConfigBuildError"))]
1925#[builder(derive(Debug, Serialize, Deserialize))]
1926pub struct VanguardConfig {
1927 /// The kind of vanguards to use.
1928 #[builder_field_attr(serde(default))]
1929 #[builder(default)]
1930 mode: ExplicitOrAuto<VanguardMode>,
1931}
1932
1933impl VanguardConfig {
1934 /// Return the configured [`VanguardMode`].
1935 ///
1936 /// Returns the [`Default`] `VanguardMode`
1937 /// if the mode is [`Auto`](ExplicitOrAuto) or unspecified.
1938 pub fn mode(&self) -> VanguardMode {
1939 match self.mode {
1940 ExplicitOrAuto::Auto => Default::default(),
1941 ExplicitOrAuto::Explicit(mode) => mode,
1942 }
1943 }
1944}
1945
1946/// The kind of vanguards to use.
1947#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1948#[derive(Serialize, Deserialize)] //
1949#[derive(derive_more::Display)] //
1950#[serde(rename_all = "lowercase")]
1951#[cfg(not(feature = "vanguards"))]
1952#[non_exhaustive]
1953pub enum VanguardMode {
1954 /// Vanguards are disabled.
1955 #[default]
1956 #[display("disabled")]
1957 Disabled = 0,
1958}
1959
1960#[cfg(test)]
1961mod test {
1962 // @@ begin test lint list maintained by maint/add_warning @@
1963 #![allow(clippy::bool_assert_comparison)]
1964 #![allow(clippy::clone_on_copy)]
1965 #![allow(clippy::dbg_macro)]
1966 #![allow(clippy::mixed_attributes_style)]
1967 #![allow(clippy::print_stderr)]
1968 #![allow(clippy::print_stdout)]
1969 #![allow(clippy::single_char_pattern)]
1970 #![allow(clippy::unwrap_used)]
1971 #![allow(clippy::unchecked_time_subtraction)]
1972 #![allow(clippy::useless_vec)]
1973 #![allow(clippy::needless_pass_by_value)]
1974 //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
1975 use super::*;
1976 use tor_linkspec::{HasAddrs, HasRelayIds};
1977 use tor_persist::TestingStateMgr;
1978 use tor_rtcompat::test_with_all_runtimes;
1979
1980 #[test]
1981 fn guard_param_defaults() {
1982 let p1 = GuardParams::default();
1983 let p2: GuardParams = (&NetParameters::default()).try_into().unwrap();
1984 assert_eq!(p1, p2);
1985 }
1986
1987 fn init<R: Runtime>(rt: R) -> (GuardMgr<R>, TestingStateMgr, NetDir) {
1988 use tor_netdir::{MdReceiver, PartialNetDir, testnet};
1989 let statemgr = TestingStateMgr::new();
1990 let have_lock = statemgr.try_lock().unwrap();
1991 assert!(have_lock.held());
1992 let guardmgr = GuardMgr::new(rt, statemgr.clone(), &TestConfig::default()).unwrap();
1993 let (con, mds) = testnet::construct_network().unwrap();
1994 let param_overrides = vec![
1995 // We make the sample size smaller than usual to compensate for the
1996 // small testing network. (Otherwise, we'd sample the whole network,
1997 // and not be able to observe guards in the tests.)
1998 "guard-min-filtered-sample-size=5",
1999 // We choose only two primary guards, to make the tests easier to write.
2000 "guard-n-primary-guards=2",
2001 // We define any restriction that allows 75% or fewer of relays as "meaningful",
2002 // so that we can test the "restrictive" guard sample behavior, and to avoid
2003 "guard-meaningful-restriction-percent=75",
2004 ];
2005 let param_overrides: String =
2006 itertools::Itertools::intersperse(param_overrides.into_iter(), " ").collect();
2007 let override_p = param_overrides.parse().unwrap();
2008 let mut netdir = PartialNetDir::new(con, Some(&override_p));
2009 for md in mds {
2010 netdir.add_microdesc(md);
2011 }
2012 let netdir = netdir.unwrap_if_sufficient().unwrap();
2013
2014 (guardmgr, statemgr, netdir)
2015 }
2016
2017 #[test]
2018 #[allow(clippy::clone_on_copy)]
2019 fn simple_case() {
2020 test_with_all_runtimes!(|rt| async move {
2021 let (guardmgr, statemgr, netdir) = init(rt.clone());
2022 let usage = GuardUsage::default();
2023 guardmgr.install_test_netdir(&netdir);
2024
2025 let (id, mon, usable) = guardmgr.select_guard(usage).unwrap();
2026 // Report that the circuit succeeded.
2027 mon.succeeded();
2028
2029 // May we use the circuit?
2030 let usable = usable.await.unwrap();
2031 assert!(usable);
2032
2033 // Save the state...
2034 guardmgr.flush_msg_queue().await;
2035 guardmgr.store_persistent_state().unwrap();
2036 drop(guardmgr);
2037
2038 // Try reloading from the state...
2039 let guardmgr2 =
2040 GuardMgr::new(rt.clone(), statemgr.clone(), &TestConfig::default()).unwrap();
2041 guardmgr2.install_test_netdir(&netdir);
2042
2043 // Since the guard was confirmed, we should get the same one this time!
2044 let usage = GuardUsage::default();
2045 let (id2, _mon, _usable) = guardmgr2.select_guard(usage).unwrap();
2046 assert!(id2.same_relay_ids(&id));
2047 });
2048 }
2049
2050 #[test]
2051 fn simple_waiting() {
2052 // TODO(nickm): This test fails in rare cases; I suspect a
2053 // race condition somewhere.
2054 //
2055 // I've doubled up on the queue flushing in order to try to make the
2056 // race less likely, but we should investigate.
2057 test_with_all_runtimes!(|rt| async move {
2058 let (guardmgr, _statemgr, netdir) = init(rt);
2059 let u = GuardUsage::default();
2060 guardmgr.install_test_netdir(&netdir);
2061
2062 // We'll have the first two guard fail, which should make us
2063 // try a non-primary guard.
2064 let (id1, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2065 mon.failed();
2066 guardmgr.flush_msg_queue().await; // avoid race
2067 guardmgr.flush_msg_queue().await; // avoid race
2068 let (id2, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2069 mon.failed();
2070 guardmgr.flush_msg_queue().await; // avoid race
2071 guardmgr.flush_msg_queue().await; // avoid race
2072
2073 assert!(!id1.same_relay_ids(&id2));
2074
2075 // Now we should get two sampled guards. They should be different.
2076 let (id3, mon3, usable3) = guardmgr.select_guard(u.clone()).unwrap();
2077 let (id4, mon4, usable4) = guardmgr.select_guard(u.clone()).unwrap();
2078 assert!(!id3.same_relay_ids(&id4));
2079
2080 let (u3, u4) = futures::join!(
2081 async {
2082 mon3.failed();
2083 guardmgr.flush_msg_queue().await; // avoid race
2084 usable3.await.unwrap()
2085 },
2086 async {
2087 mon4.succeeded();
2088 usable4.await.unwrap()
2089 }
2090 );
2091
2092 assert_eq!((u3, u4), (false, true));
2093 });
2094 }
2095
2096 #[test]
2097 fn filtering_basics() {
2098 test_with_all_runtimes!(|rt| async move {
2099 let (guardmgr, _statemgr, netdir) = init(rt);
2100 let u = GuardUsage::default();
2101 let filter = {
2102 let mut f = GuardFilter::default();
2103 // All the addresses in the test network are {0,1,2,3,4}.0.0.3:9001.
2104 // Limit to only 2.0.0.0/8
2105 f.push_reachable_addresses(vec!["2.0.0.0/8:9001".parse().unwrap()]);
2106 f
2107 };
2108 guardmgr.set_filter(filter);
2109 guardmgr.install_test_netdir(&netdir);
2110 let (guard, _mon, _usable) = guardmgr.select_guard(u).unwrap();
2111 // Make sure that the filter worked.
2112 let addr = guard.addrs().next().unwrap();
2113 assert_eq!(addr, "2.0.0.3:9001".parse().unwrap());
2114 });
2115 }
2116
2117 #[test]
2118 fn external_status() {
2119 test_with_all_runtimes!(|rt| async move {
2120 let (guardmgr, _statemgr, netdir) = init(rt);
2121 let data_usage = GuardUsage::default();
2122 let dir_usage = GuardUsageBuilder::new()
2123 .kind(GuardUsageKind::OneHopDirectory)
2124 .build()
2125 .unwrap();
2126 guardmgr.install_test_netdir(&netdir);
2127 {
2128 // Override this parameter, so that we can get deterministic results below.
2129 let mut inner = guardmgr.inner.lock().unwrap();
2130 inner.params.dir_parallelism = 1;
2131 }
2132
2133 let (guard, mon, _usable) = guardmgr.select_guard(data_usage.clone()).unwrap();
2134 mon.succeeded();
2135
2136 // Record that this guard gave us a bad directory object.
2137 guardmgr.note_external_failure(&guard, ExternalActivity::DirCache);
2138
2139 // We ask for another guard, for data usage. We should get the same
2140 // one as last time, since the director failure doesn't mean this
2141 // guard is useless as a primary guard.
2142 let (g2, mon, _usable) = guardmgr.select_guard(data_usage).unwrap();
2143 assert_eq!(g2.ed_identity(), guard.ed_identity());
2144 mon.succeeded();
2145
2146 // But if we ask for a guard for directory usage, we should get a
2147 // different one, since the last guard we gave out failed.
2148 let (g3, mon, _usable) = guardmgr.select_guard(dir_usage.clone()).unwrap();
2149 assert_ne!(g3.ed_identity(), guard.ed_identity());
2150 mon.succeeded();
2151
2152 // Now record a success for for directory usage.
2153 guardmgr.note_external_success(&guard, ExternalActivity::DirCache);
2154
2155 // Now that the guard is working as a cache, asking for it should get us the same guard.
2156 let (g4, _mon, _usable) = guardmgr.select_guard(dir_usage).unwrap();
2157 assert_eq!(g4.ed_identity(), guard.ed_identity());
2158 });
2159 }
2160
2161 #[cfg(feature = "vanguards")]
2162 #[test]
2163 fn vanguard_mode_ord() {
2164 assert!(VanguardMode::Disabled < VanguardMode::Lite);
2165 assert!(VanguardMode::Disabled < VanguardMode::Full);
2166 assert!(VanguardMode::Lite < VanguardMode::Full);
2167 }
2168}