tor_guardmgr/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3// @@ begin lint list maintained by maint/add_warning @@
4#![allow(renamed_and_removed_lints)] // @@REMOVE_WHEN(ci_arti_stable)
5#![allow(unknown_lints)] // @@REMOVE_WHEN(ci_arti_nightly)
6#![warn(missing_docs)]
7#![warn(noop_method_call)]
8#![warn(unreachable_pub)]
9#![warn(clippy::all)]
10#![deny(clippy::await_holding_lock)]
11#![deny(clippy::cargo_common_metadata)]
12#![deny(clippy::cast_lossless)]
13#![deny(clippy::checked_conversions)]
14#![warn(clippy::cognitive_complexity)]
15#![deny(clippy::debug_assert_with_mut_call)]
16#![deny(clippy::exhaustive_enums)]
17#![deny(clippy::exhaustive_structs)]
18#![deny(clippy::expl_impl_clone_on_copy)]
19#![deny(clippy::fallible_impl_from)]
20#![deny(clippy::implicit_clone)]
21#![deny(clippy::large_stack_arrays)]
22#![warn(clippy::manual_ok_or)]
23#![deny(clippy::missing_docs_in_private_items)]
24#![warn(clippy::needless_borrow)]
25#![warn(clippy::needless_pass_by_value)]
26#![warn(clippy::option_option)]
27#![deny(clippy::print_stderr)]
28#![deny(clippy::print_stdout)]
29#![warn(clippy::rc_buffer)]
30#![deny(clippy::ref_option_ref)]
31#![warn(clippy::semicolon_if_nothing_returned)]
32#![warn(clippy::trait_duplication_in_bounds)]
33#![deny(clippy::unchecked_time_subtraction)]
34#![deny(clippy::unnecessary_wraps)]
35#![warn(clippy::unseparated_literal_suffix)]
36#![deny(clippy::unwrap_used)]
37#![deny(clippy::mod_module_files)]
38#![allow(clippy::let_unit_value)] // This can reasonably be done for explicitness
39#![allow(clippy::uninlined_format_args)]
40#![allow(clippy::significant_drop_in_scrutinee)] // arti/-/merge_requests/588/#note_2812945
41#![allow(clippy::result_large_err)] // temporary workaround for arti#587
42#![allow(clippy::needless_raw_string_hashes)] // complained-about code is fine, often best
43#![allow(clippy::needless_lifetimes)] // See arti#1765
44#![allow(mismatched_lifetime_syntaxes)] // temporary workaround for arti#2060
45#![allow(clippy::collapsible_if)] // See arti#2342
46#![deny(clippy::unused_async)]
47//! <!-- @@ end lint list maintained by maint/add_warning @@ -->
48
49// TODO #1645 (either remove this, or decide to have it everywhere)
50#![cfg_attr(not(all(feature = "full", feature = "experimental")), allow(unused))]
51
52// Glossary:
53//     Primary guard
54//     Sample
55//     confirmed
56//     filtered
57
58use derive_deftly::Deftly;
59use futures::channel::mpsc;
60use itertools::Either;
61use serde::{Deserialize, Serialize};
62use std::collections::HashMap;
63use std::net::SocketAddr;
64use std::sync::{Arc, Mutex, Weak};
65use std::time::{Duration, Instant, SystemTime};
66#[cfg(feature = "bridge-client")]
67use tor_error::internal;
68use tor_linkspec::{OwnedChanTarget, OwnedCircTarget, RelayId, RelayIdSet};
69use tor_netdir::NetDirProvider;
70use tor_proto::ClockSkew;
71use tor_rtcompat::SpawnExt;
72use tor_units::BoundedInt32;
73use tracing::{debug, info, instrument, trace, warn};
74
75use tor_config::derive::prelude::*;
76use tor_config::{ExplicitOrAuto, impl_standard_builder};
77use tor_config::{ReconfigureError, impl_not_auto_value};
78use tor_config::{define_list_builder_accessors, define_list_builder_helper};
79use tor_netdir::{NetDir, Relay, params::NetParameters};
80use tor_persist::{DynStorageHandle, StateMgr};
81use tor_rtcompat::Runtime;
82
83#[cfg(feature = "bridge-client")]
84pub mod bridge;
85mod config;
86mod daemon;
87mod dirstatus;
88mod err;
89mod events;
90pub mod fallback;
91mod filter;
92mod guard;
93mod ids;
94mod pending;
95mod sample;
96mod skew;
97mod util;
98#[cfg(feature = "vanguards")]
99pub mod vanguards;
100
101#[cfg(not(feature = "bridge-client"))]
102#[path = "bridge_disabled.rs"]
103pub mod bridge;
104
105#[cfg(any(test, feature = "testing"))]
106pub use config::testing::TestConfig;
107
108#[cfg(test)]
109use oneshot_fused_workaround as oneshot;
110
111pub use config::GuardMgrConfig;
112pub use err::{GuardMgrConfigError, GuardMgrError, PickGuardError};
113pub use events::ClockSkewEvents;
114pub use filter::GuardFilter;
115pub use ids::FirstHopId;
116pub use pending::{GuardMonitor, GuardStatus, GuardUsable};
117pub use skew::SkewEstimate;
118
119#[cfg(feature = "vanguards")]
120pub use vanguards::VanguardMgrError;
121
122use pending::{PendingRequest, RequestId};
123use sample::{GuardSet, Universe, UniverseRef};
124
125use crate::ids::{FirstHopIdInner, GuardId};
126
127/// A "guard manager" that selects and remembers a persistent set of
128/// guard nodes.
129///
130/// This is a "handle"; clones of it share state.
131#[derive(Clone)]
132pub struct GuardMgr<R: Runtime> {
133    /// An asynchronous runtime object.
134    ///
135    /// GuardMgr uses this runtime for timing, timeouts, and spawning
136    /// tasks.
137    runtime: R,
138
139    /// Internal state for the guard manager.
140    inner: Arc<Mutex<GuardMgrInner>>,
141}
142
143/// Helper type that holds the data used by a [`GuardMgr`].
144///
145/// This would just be a [`GuardMgr`], except that it needs to sit inside
146/// a `Mutex` and get accessed by daemon tasks.
147struct GuardMgrInner {
148    /// Last time when marked all of our primary guards as retriable.
149    ///
150    /// We keep track of this time so that we can rate-limit
151    /// these attempts.
152    last_primary_retry_time: Instant,
153
154    /// Persistent guard manager state.
155    ///
156    /// This object remembers one or more persistent set of guards that we can
157    /// use, along with their relative priorities and statuses.
158    guards: GuardSets,
159
160    /// The current filter that we're using to decide which guards are
161    /// supported.
162    //
163    // TODO: This field is duplicated in the current active [`GuardSet`]; we
164    // should fix that.
165    filter: GuardFilter,
166
167    /// Configuration values derived from the consensus parameters.
168    ///
169    /// This is updated whenever the consensus parameters change.
170    params: GuardParams,
171
172    /// A mpsc channel, used to tell the task running in
173    /// [`daemon::report_status_events`] about a new event to monitor.
174    ///
175    /// This uses an `UnboundedSender` so that we don't have to await
176    /// while sending the message, which in turn allows the GuardMgr
177    /// API to be simpler.  The risk, however, is that there's no
178    /// backpressure in the event that the task running
179    /// [`daemon::report_status_events`] fails to read from this
180    /// channel.
181    ctrl: mpsc::UnboundedSender<daemon::Msg>,
182
183    /// Information about guards that we've given out, but where we have
184    /// not yet heard whether the guard was successful.
185    ///
186    /// Upon leaning whether the guard was successful, the pending
187    /// requests in this map may be either moved to `waiting`, or
188    /// discarded.
189    ///
190    /// There can be multiple pending requests corresponding to the
191    /// same guard.
192    pending: HashMap<RequestId, PendingRequest>,
193
194    /// A list of pending requests for which we have heard that the
195    /// guard was successful, but we have not yet decided whether the
196    /// circuit may be used.
197    ///
198    /// There can be multiple waiting requests corresponding to the
199    /// same guard.
200    waiting: Vec<PendingRequest>,
201
202    /// A list of fallback directories used to access the directory system
203    /// when no other directory information is yet known.
204    fallbacks: fallback::FallbackState,
205
206    /// Location in which to store persistent state.
207    storage: DynStorageHandle<GuardSets>,
208
209    /// A sender object to publish changes in our estimated clock skew.
210    send_skew: postage::watch::Sender<Option<SkewEstimate>>,
211
212    /// A receiver object to hand out to observers who want to know about
213    /// changes in our estimated clock skew.
214    recv_skew: events::ClockSkewEvents,
215
216    /// A netdir provider that we can use for adding new guards when
217    /// insufficient guards are available.
218    ///
219    /// This has to be an Option so it can be initialized from None: at the
220    /// time a GuardMgr is created, there is no NetDirProvider for it to use.
221    netdir_provider: Option<Weak<dyn NetDirProvider>>,
222
223    /// A netdir provider that we can use for discovering bridge descriptors.
224    ///
225    /// This has to be an Option so it can be initialized from None: at the time
226    /// a GuardMgr is created, there is no BridgeDescProvider for it to use.
227    #[cfg(feature = "bridge-client")]
228    bridge_desc_provider: Option<Weak<dyn bridge::BridgeDescProvider>>,
229
230    /// A list of the bridges that we are configured to use, or "None" if we are
231    /// not configured to use bridges.
232    #[cfg(feature = "bridge-client")]
233    configured_bridges: Option<Arc<[bridge::BridgeConfig]>>,
234}
235
236/// A selector that tells us which [`GuardSet`] of several is currently in use.
237#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, strum::EnumIter)]
238enum GuardSetSelector {
239    /// The default guard set is currently in use: that's the one that we use
240    /// when we have no filter installed, or the filter permits most of the
241    /// guards on the network.
242    #[default]
243    Default,
244    /// A "restrictive" guard set is currently in use: that's the one that we
245    /// use when we have a filter that excludes a large fraction of the guards
246    /// on the network.
247    Restricted,
248    /// The "bridges" guard set is currently in use: we are selecting our guards
249    /// from among the universe of configured bridges.
250    #[cfg(feature = "bridge-client")]
251    Bridges,
252}
253
254/// Describes the [`Universe`] that a guard sample should take its guards from.
255#[derive(Clone, Copy, Debug, Eq, PartialEq)]
256enum UniverseType {
257    /// Take information from the network directory.
258    NetDir,
259    /// Take information from the configured bridges.
260    #[cfg(feature = "bridge-client")]
261    BridgeSet,
262}
263
264impl GuardSetSelector {
265    /// Return a description of which [`Universe`] this guard sample should take
266    /// its guards from.
267    fn universe_type(&self) -> UniverseType {
268        match self {
269            GuardSetSelector::Default | GuardSetSelector::Restricted => UniverseType::NetDir,
270            #[cfg(feature = "bridge-client")]
271            GuardSetSelector::Bridges => UniverseType::BridgeSet,
272        }
273    }
274}
275
276/// Persistent state for a guard manager, as serialized to disk.
277#[derive(Debug, Clone, Default, Serialize, Deserialize)]
278struct GuardSets {
279    /// Which set of guards is currently in use?
280    #[serde(skip)]
281    active_set: GuardSetSelector,
282
283    /// The default set of guards to use.
284    ///
285    /// We use this one when there is no filter, or the filter permits most of the
286    /// guards on the network.
287    default: GuardSet,
288
289    /// A guard set to use when we have a restrictive filter.
290    #[serde(default)]
291    restricted: GuardSet,
292
293    /// A guard set sampled from our configured bridges.
294    #[serde(default)]
295    #[cfg(feature = "bridge-client")]
296    bridges: GuardSet,
297
298    /// Unrecognized fields, including (possibly) other guard sets.
299    #[serde(flatten)]
300    remaining: HashMap<String, tor_persist::JsonValue>,
301}
302
303/// The key (filename) we use for storing our persistent guard state in the
304/// `StateMgr`.
305///
306/// We used to store this in a different format in a filename called
307/// "default_guards" (before Arti 0.1.0).
308const STORAGE_KEY: &str = "guards";
309
310/// A description of which circuits to retire because of a configuration change.
311///
312/// TODO(nickm): Eventually we will want to add a "Some" here, to support
313/// removing only those circuits that correspond to no-longer-usable guards.
314#[derive(Clone, Debug, Eq, PartialEq)]
315#[must_use]
316#[non_exhaustive]
317pub enum RetireCircuits {
318    /// There's no need to retire any circuits.
319    None,
320    /// All circuits should be retired.
321    All,
322}
323
324impl<R: Runtime> GuardMgr<R> {
325    /// Create a new "empty" guard manager and launch its background tasks.
326    ///
327    /// It won't be able to hand out any guards until a [`NetDirProvider`] has
328    /// been installed.
329    #[instrument(skip_all, level = "trace")]
330    pub fn new<S>(
331        runtime: R,
332        state_mgr: S,
333        config: &impl GuardMgrConfig,
334    ) -> Result<Self, GuardMgrError>
335    where
336        S: StateMgr + Send + Sync + 'static,
337    {
338        let (ctrl, rcv) = mpsc::unbounded();
339        let storage: DynStorageHandle<GuardSets> = state_mgr.create_handle(STORAGE_KEY);
340        // TODO(nickm): We should do something about the old state in
341        // `default_guards`.  Probably it would be best to delete it.  We could
342        // try to migrate it instead, but that's beyond the stability guarantee
343        // that we're getting at this stage of our (pre-0.1) development.
344        let state = storage.load()?.unwrap_or_default();
345
346        let (send_skew, recv_skew) = postage::watch::channel();
347        let recv_skew = ClockSkewEvents { inner: recv_skew };
348
349        let inner = Arc::new(Mutex::new(GuardMgrInner {
350            guards: state,
351            filter: GuardFilter::unfiltered(),
352            last_primary_retry_time: runtime.now(),
353            params: GuardParams::default(),
354            ctrl,
355            pending: HashMap::new(),
356            waiting: Vec::new(),
357            fallbacks: config.fallbacks().into(),
358            storage,
359            send_skew,
360            recv_skew,
361            netdir_provider: None,
362            #[cfg(feature = "bridge-client")]
363            bridge_desc_provider: None,
364            #[cfg(feature = "bridge-client")]
365            configured_bridges: None,
366        }));
367        #[cfg(feature = "bridge-client")]
368        {
369            let mut inner = inner.lock().expect("lock poisoned");
370            // TODO(nickm): This calls `GuardMgrInner::update`. Will we mind doing so before any
371            // providers are configured? I think not, but we should make sure.
372            let _: RetireCircuits =
373                inner.replace_bridge_config(config, runtime.wallclock(), runtime.now())?;
374        }
375        {
376            let weak_inner = Arc::downgrade(&inner);
377            let rt_clone = runtime.clone();
378            runtime
379                .spawn(daemon::report_status_events(rt_clone, weak_inner, rcv))
380                .map_err(|e| GuardMgrError::from_spawn("guard status event reporter", e))?;
381        }
382        {
383            let rt_clone = runtime.clone();
384            let weak_inner = Arc::downgrade(&inner);
385            runtime
386                .spawn(daemon::run_periodic(rt_clone, weak_inner))
387                .map_err(|e| GuardMgrError::from_spawn("periodic guard updater", e))?;
388        }
389        Ok(GuardMgr { runtime, inner })
390    }
391
392    /// Install a [`NetDirProvider`] for use by this guard manager.
393    ///
394    /// It will be used to keep the guards up-to-date with changes from the
395    /// network directory, and to find new guards when no NetDir is provided to
396    /// select_guard().
397    ///
398    /// TODO: we should eventually return some kind of a task handle from this
399    /// task, even though it is not strictly speaking periodic.
400    ///
401    /// The guardmgr retains only a `Weak` reference to `provider`,
402    /// `install_netdir_provider` downgrades it on entry,
403    // TODO add ref to document when https://gitlab.torproject.org/tpo/core/arti/-/issues/624
404    // is fixed.  Also, maybe take an owned `Weak` to start with.
405    //
406    /// # Panics
407    ///
408    /// Panics if a [`NetDirProvider`] is already installed.
409    pub fn install_netdir_provider(
410        &self,
411        provider: &Arc<dyn NetDirProvider>,
412    ) -> Result<(), GuardMgrError> {
413        let weak_provider = Arc::downgrade(provider);
414        {
415            let mut inner = self.inner.lock().expect("Poisoned lock");
416            assert!(inner.netdir_provider.is_none());
417            inner.netdir_provider = Some(weak_provider.clone());
418        }
419        let weak_inner = Arc::downgrade(&self.inner);
420        let rt_clone = self.runtime.clone();
421        self.runtime
422            .spawn(daemon::keep_netdir_updated(
423                rt_clone,
424                weak_inner,
425                weak_provider,
426            ))
427            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
428        Ok(())
429    }
430
431    /// Configure a new [`bridge::BridgeDescProvider`] for this [`GuardMgr`].
432    ///
433    /// It will be used to learn about changes in the set of available bridge
434    /// descriptors; we'll inform it whenever our desired set of bridge
435    /// descriptors changes.
436    ///
437    /// TODO: Same todo as in `install_netdir_provider` about task handles.
438    ///
439    /// # Panics
440    ///
441    /// Panics if a [`bridge::BridgeDescProvider`] is already installed.
442    #[cfg(feature = "bridge-client")]
443    pub fn install_bridge_desc_provider(
444        &self,
445        provider: &Arc<dyn bridge::BridgeDescProvider>,
446    ) -> Result<(), GuardMgrError> {
447        let weak_provider = Arc::downgrade(provider);
448        {
449            let mut inner = self.inner.lock().expect("Poisoned lock");
450            assert!(inner.bridge_desc_provider.is_none());
451            inner.bridge_desc_provider = Some(weak_provider.clone());
452        }
453
454        let weak_inner = Arc::downgrade(&self.inner);
455        let rt_clone = self.runtime.clone();
456        self.runtime
457            .spawn(daemon::keep_bridge_descs_updated(
458                rt_clone,
459                weak_inner,
460                weak_provider,
461            ))
462            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
463
464        Ok(())
465    }
466
467    /// Flush our current guard state to the state manager, if there
468    /// is any unsaved state.
469    pub fn store_persistent_state(&self) -> Result<(), GuardMgrError> {
470        let inner = self.inner.lock().expect("Poisoned lock");
471        trace!("Flushing guard state to disk.");
472        inner.storage.store(&inner.guards)?;
473        Ok(())
474    }
475
476    /// Reload state from the state manager.
477    ///
478    /// We only call this method if we _don't_ have the lock on the state
479    /// files.  If we have the lock, we only want to save.
480    #[instrument(level = "trace", skip_all)]
481    pub fn reload_persistent_state(&self) -> Result<(), GuardMgrError> {
482        let mut inner = self.inner.lock().expect("Poisoned lock");
483        if let Some(new_guards) = inner.storage.load()? {
484            inner.replace_guards_with(new_guards, self.runtime.wallclock(), self.runtime.now());
485        }
486        Ok(())
487    }
488
489    /// Switch from having an unowned persistent state to having an owned one.
490    ///
491    /// Requires that we hold the lock on the state files.
492    #[instrument(level = "trace", skip_all)]
493    pub fn upgrade_to_owned_persistent_state(&self) -> Result<(), GuardMgrError> {
494        let mut inner = self.inner.lock().expect("Poisoned lock");
495        debug_assert!(inner.storage.can_store());
496        let new_guards = inner.storage.load()?.unwrap_or_default();
497        let wallclock = self.runtime.wallclock();
498        let now = self.runtime.now();
499        inner.replace_guards_with(new_guards, wallclock, now);
500        Ok(())
501    }
502
503    /// Return true if `netdir` has enough information to safely become our new netdir.
504    pub fn netdir_is_sufficient(&self, netdir: &NetDir) -> bool {
505        let mut inner = self.inner.lock().expect("Poisoned lock");
506        if inner.guards.active_set.universe_type() != UniverseType::NetDir {
507            // If we aren't using the netdir, this isn't something we want to look at.
508            return true;
509        }
510        inner
511            .guards
512            .active_guards_mut()
513            .n_primary_without_id_info_in(netdir)
514            == 0
515    }
516
517    /// Mark every guard as potentially retriable, regardless of how recently we
518    /// failed to connect to it.
519    pub fn mark_all_guards_retriable(&self) {
520        let mut inner = self.inner.lock().expect("Poisoned lock");
521        inner.guards.active_guards_mut().mark_all_guards_retriable();
522    }
523
524    /// Configure this guardmgr to use a fixed [`NetDir`] instead of a provider.
525    ///
526    /// This function is for testing only, and is exclusive with
527    /// `install_netdir_provider`.
528    ///
529    /// # Panics
530    ///
531    /// Panics if any [`NetDirProvider`] has already been installed.
532    #[cfg(any(test, feature = "testing"))]
533    pub fn install_test_netdir(&self, netdir: &NetDir) {
534        use tor_netdir::testprovider::TestNetDirProvider;
535        let wallclock = self.runtime.wallclock();
536        let now = self.runtime.now();
537        let netdir_provider: Arc<dyn NetDirProvider> =
538            Arc::new(TestNetDirProvider::from(netdir.clone()));
539        self.install_netdir_provider(&netdir_provider)
540            .expect("Couldn't install testing network provider");
541
542        let mut inner = self.inner.lock().expect("Poisoned lock");
543        inner.update(wallclock, now);
544    }
545
546    /// Replace the configuration in this `GuardMgr` with `config`.
547    #[instrument(level = "trace", skip_all)]
548    pub fn reconfigure(
549        &self,
550        config: &impl GuardMgrConfig,
551    ) -> Result<RetireCircuits, ReconfigureError> {
552        let mut inner = self.inner.lock().expect("Poisoned lock");
553        // Change the set of configured fallbacks.
554        {
555            let mut fallbacks: fallback::FallbackState = config.fallbacks().into();
556            std::mem::swap(&mut inner.fallbacks, &mut fallbacks);
557            inner.fallbacks.take_status_from(fallbacks);
558        }
559        // If we are built to use bridges, change the bridge configuration.
560        #[cfg(feature = "bridge-client")]
561        {
562            let wallclock = self.runtime.wallclock();
563            let now = self.runtime.now();
564            Ok(inner.replace_bridge_config(config, wallclock, now)?)
565        }
566        // If we are built to use bridges, change the bridge configuration.
567        #[cfg(not(feature = "bridge-client"))]
568        {
569            Ok(RetireCircuits::None)
570        }
571    }
572
573    /// Replace the current [`GuardFilter`] used by this `GuardMgr`.
574    // TODO should this be part of the config?
575    pub fn set_filter(&self, filter: GuardFilter) {
576        let wallclock = self.runtime.wallclock();
577        let now = self.runtime.now();
578        let mut inner = self.inner.lock().expect("Poisoned lock");
579        inner.set_filter(filter, wallclock, now);
580    }
581
582    /// Select a guard for a given [`GuardUsage`].
583    ///
584    /// On success, we return a [`FirstHop`] object to identify which
585    /// guard we have picked, a [`GuardMonitor`] object that the
586    /// caller can use to report whether its attempt to use the guard
587    /// succeeded or failed, and a [`GuardUsable`] future that the
588    /// caller can use to decide whether a circuit built through the
589    /// guard is actually safe to use.
590    ///
591    /// That last point is important: It's okay to build a circuit
592    /// through the guard returned by this function, but you can't
593    /// actually use it for traffic unless the [`GuardUsable`] future
594    /// yields "true".
595    #[instrument(skip_all, level = "trace")]
596    pub fn select_guard(
597        &self,
598        usage: GuardUsage,
599    ) -> Result<(FirstHop, GuardMonitor, GuardUsable), PickGuardError> {
600        let now = self.runtime.now();
601        let wallclock = self.runtime.wallclock();
602
603        let mut inner = self.inner.lock().expect("Poisoned lock");
604
605        // (I am not 100% sure that we need to consider_all_retries here, but
606        // it should _probably_ not hurt.)
607        inner.guards.active_guards_mut().consider_all_retries(now);
608
609        let (origin, guard) = inner.select_guard_with_expand(&usage, now, wallclock)?;
610        trace!(?guard, ?usage, "Guard selected");
611
612        let (usable, usable_sender) = if origin.usable_immediately() {
613            (GuardUsable::new_usable_immediately(), None)
614        } else {
615            let (u, snd) = GuardUsable::new_uncertain();
616            (u, Some(snd))
617        };
618        let request_id = pending::RequestId::next();
619        let ctrl = inner.ctrl.clone();
620        let monitor = GuardMonitor::new(request_id, ctrl);
621
622        // Note that the network can be down even if all the primary guards
623        // are not yet marked as unreachable.  But according to guard-spec we
624        // don't want to acknowledge the net as down before that point, since
625        // we don't mark all the primary guards as retriable unless
626        // we've been forced to non-primary guards.
627        let net_has_been_down =
628            if let Some(duration) = tor_proto::time_since_last_incoming_traffic() {
629                inner
630                    .guards
631                    .active_guards_mut()
632                    .all_primary_guards_are_unreachable()
633                    && duration >= inner.params.internet_down_timeout
634            } else {
635                // TODO: Is this the correct behavior in this case?
636                false
637            };
638
639        let pending_request = pending::PendingRequest::new(
640            guard.first_hop_id(),
641            usage,
642            usable_sender,
643            net_has_been_down,
644        );
645        inner.pending.insert(request_id, pending_request);
646
647        match &guard.sample {
648            Some(sample) => {
649                let guard_id = GuardId::from_relay_ids(&guard);
650                inner
651                    .guards
652                    .guards_mut(sample)
653                    .record_attempt(&guard_id, now);
654            }
655            None => {
656                // We don't record attempts for fallbacks; we only care when
657                // they have failed.
658            }
659        }
660
661        Ok((guard, monitor, usable))
662    }
663
664    /// Record that _after_ we built a circuit with a guard, something described
665    /// in `external_failure` went wrong with it.
666    pub fn note_external_failure<T>(&self, identity: &T, external_failure: ExternalActivity)
667    where
668        T: tor_linkspec::HasRelayIds + ?Sized,
669    {
670        let now = self.runtime.now();
671        let mut inner = self.inner.lock().expect("Poisoned lock");
672        let ids = inner.lookup_ids(identity);
673        for id in ids {
674            match &id.0 {
675                FirstHopIdInner::Guard(sample, id) => {
676                    inner
677                        .guards
678                        .guards_mut(sample)
679                        .record_failure(id, Some(external_failure), now);
680                }
681                FirstHopIdInner::Fallback(id) => {
682                    if external_failure == ExternalActivity::DirCache {
683                        inner.fallbacks.note_failure(id, now);
684                    }
685                }
686            }
687        }
688    }
689
690    /// Record that _after_ we built a circuit with a guard, some activity
691    /// described in `external_activity` was successful with it.
692    pub fn note_external_success<T>(&self, identity: &T, external_activity: ExternalActivity)
693    where
694        T: tor_linkspec::HasRelayIds + ?Sized,
695    {
696        let mut inner = self.inner.lock().expect("Poisoned lock");
697
698        inner.record_external_success(identity, external_activity, self.runtime.wallclock());
699    }
700
701    /// Return a stream of events about our estimated clock skew; these events
702    /// are `None` when we don't have enough information to make an estimate,
703    /// and `Some(`[`SkewEstimate`]`)` otherwise.
704    ///
705    /// Note that this stream can be lossy: if the estimate changes more than
706    /// one before you read from the stream, you might only get the most recent
707    /// update.
708    pub fn skew_events(&self) -> ClockSkewEvents {
709        let inner = self.inner.lock().expect("Poisoned lock");
710        inner.recv_skew.clone()
711    }
712
713    /// Ensure that the message queue is flushed before proceeding to
714    /// the next step.  Used for testing.
715    #[cfg(test)]
716    async fn flush_msg_queue(&self) {
717        let (snd, rcv) = oneshot::channel();
718        let pingmsg = daemon::Msg::Ping(snd);
719        {
720            let inner = self.inner.lock().expect("Poisoned lock");
721            inner
722                .ctrl
723                .unbounded_send(pingmsg)
724                .expect("Guard observer task exited prematurely.");
725        }
726        let _ = rcv.await;
727    }
728}
729
730/// An activity that can succeed or fail, and whose success or failure can be
731/// attributed to a guard.
732#[derive(Copy, Clone, Debug, Eq, PartialEq)]
733#[non_exhaustive]
734pub enum ExternalActivity {
735    /// The activity of using the guard as a directory cache.
736    DirCache,
737}
738
739impl GuardSets {
740    /// Return a reference to the currently active set of guards.
741    ///
742    /// (That's easy enough for now, since there is never more than one set of
743    /// guards.  But eventually that will change, as we add support for more
744    /// complex filter types, and for bridge relays. Those will use separate
745    /// `GuardSet` instances, and this accessor will choose the right one.)
746    fn active_guards(&self) -> &GuardSet {
747        self.guards(&self.active_set)
748    }
749
750    /// Return the set of guards corresponding to the provided selector.
751    fn guards(&self, selector: &GuardSetSelector) -> &GuardSet {
752        match selector {
753            GuardSetSelector::Default => &self.default,
754            GuardSetSelector::Restricted => &self.restricted,
755            #[cfg(feature = "bridge-client")]
756            GuardSetSelector::Bridges => &self.bridges,
757        }
758    }
759
760    /// Return a mutable reference to the currently active set of guards.
761    fn active_guards_mut(&mut self) -> &mut GuardSet {
762        self.guards_mut(&self.active_set.clone())
763    }
764
765    /// Return a mutable reference to the set of guards corresponding to the
766    /// provided selector.
767    fn guards_mut(&mut self, selector: &GuardSetSelector) -> &mut GuardSet {
768        match selector {
769            GuardSetSelector::Default => &mut self.default,
770            GuardSetSelector::Restricted => &mut self.restricted,
771            #[cfg(feature = "bridge-client")]
772            GuardSetSelector::Bridges => &mut self.bridges,
773        }
774    }
775
776    /// Update all non-persistent state for the guards in this object with the
777    /// state in `other`.
778    fn copy_status_from(&mut self, mut other: GuardSets) {
779        use strum::IntoEnumIterator;
780        for sample in GuardSetSelector::iter() {
781            self.guards_mut(&sample)
782                .copy_ephemeral_status_into_newly_loaded_state(std::mem::take(
783                    other.guards_mut(&sample),
784                ));
785        }
786        self.active_set = other.active_set;
787    }
788}
789
790impl GuardMgrInner {
791    /// Look up the latest [`NetDir`] (if there is one) from our
792    /// [`NetDirProvider`] (if we have one).
793    fn timely_netdir(&self) -> Option<Arc<NetDir>> {
794        self.netdir_provider
795            .as_ref()
796            .and_then(Weak::upgrade)
797            .and_then(|np| np.timely_netdir().ok())
798    }
799
800    /// Look up the latest [`BridgeDescList`](bridge::BridgeDescList) (if there
801    /// is one) from our [`BridgeDescProvider`](bridge::BridgeDescProvider) (if
802    /// we have one).
803    #[cfg(feature = "bridge-client")]
804    fn latest_bridge_desc_list(&self) -> Option<Arc<bridge::BridgeDescList>> {
805        self.bridge_desc_provider
806            .as_ref()
807            .and_then(Weak::upgrade)
808            .map(|bp| bp.bridges())
809    }
810
811    /// Run a function that takes `&mut self` and an optional NetDir.
812    ///
813    /// We try to use the netdir from our [`NetDirProvider`] (if we have one).
814    /// Therefore, although its _parameters_ are suitable for every
815    /// [`GuardSet`], its _contents_ might not be. For those, call
816    /// [`with_opt_universe`](Self::with_opt_universe) instead.
817    //
818    // This function exists to handle the lifetime mess where sometimes the
819    // resulting NetDir will borrow from `netdir`, and sometimes it will borrow
820    // from an Arc returned by `self.latest_netdir()`.
821    fn with_opt_netdir<F, T>(&mut self, func: F) -> T
822    where
823        F: FnOnce(&mut Self, Option<&NetDir>) -> T,
824    {
825        if let Some(nd) = self.timely_netdir() {
826            func(self, Some(nd.as_ref()))
827        } else {
828            func(self, None)
829        }
830    }
831
832    /// Return the latest `BridgeSet` based on our `BridgeDescProvider` and our
833    /// configured bridges.
834    ///
835    /// Returns `None` if we are not configured to use bridges.
836    #[cfg(feature = "bridge-client")]
837    fn latest_bridge_set(&self) -> Option<bridge::BridgeSet> {
838        let bridge_config = self.configured_bridges.as_ref()?.clone();
839        let bridge_descs = self.latest_bridge_desc_list();
840        Some(bridge::BridgeSet::new(bridge_config, bridge_descs))
841    }
842
843    /// Run a function that takes `&mut self` and an optional [`UniverseRef`].
844    ///
845    /// We try to get a universe from the appropriate source for the current
846    /// active guard set.
847    fn with_opt_universe<F, T>(&mut self, func: F) -> T
848    where
849        F: FnOnce(&mut Self, Option<&UniverseRef>) -> T,
850    {
851        // TODO: it might be nice to make `func` take an GuardSet and a set of
852        // parameters, so we can't get the active set wrong. Doing that will
853        // require a fair amount of refactoring so that the borrow checker is
854        // happy, however.
855        match self.guards.active_set.universe_type() {
856            UniverseType::NetDir => {
857                if let Some(nd) = self.timely_netdir() {
858                    func(self, Some(&UniverseRef::NetDir(nd)))
859                } else {
860                    func(self, None)
861                }
862            }
863            #[cfg(feature = "bridge-client")]
864            UniverseType::BridgeSet => func(
865                self,
866                self.latest_bridge_set()
867                    .map(UniverseRef::BridgeSet)
868                    .as_ref(),
869            ),
870        }
871    }
872
873    /// Update the status of all guards in the active set, based on the passage
874    /// of time, our configuration, and the relevant Universe for our active
875    /// set.
876    #[instrument(skip_all, level = "trace")]
877    fn update(&mut self, wallclock: SystemTime, now: Instant) {
878        self.with_opt_netdir(|this, netdir| {
879            // Here we update our parameters from the latest NetDir, and check
880            // whether we need to change to a (non)-restrictive GuardSet based
881            // on those parameters and our configured filter.
882            //
883            // This uses a NetDir unconditionally, since we always want to take
884            // the network parameters our parameters from the consensus even if
885            // the guards themselves are from a BridgeSet.
886            this.update_active_set_params_and_filter(netdir);
887        });
888        self.with_opt_universe(|this, univ| {
889            // Now we update the set of guards themselves based on the
890            // Universe, which is either the latest NetDir, or the latest
891            // BridgeSet—depending on what the GuardSet wants.
892            Self::update_guardset_internal(
893                &this.params,
894                wallclock,
895                this.guards.active_set.universe_type(),
896                this.guards.active_guards_mut(),
897                univ,
898            );
899            #[cfg(feature = "bridge-client")]
900            this.update_desired_descriptors(now);
901            #[cfg(not(feature = "bridge-client"))]
902            let _ = now;
903        });
904    }
905
906    /// Replace our bridge configuration with the one from `new_config`.
907    #[cfg(feature = "bridge-client")]
908    #[instrument(level = "trace", skip_all)]
909    fn replace_bridge_config(
910        &mut self,
911        new_config: &impl GuardMgrConfig,
912        wallclock: SystemTime,
913        now: Instant,
914    ) -> Result<RetireCircuits, GuardMgrConfigError> {
915        match (&self.configured_bridges, new_config.bridges_enabled()) {
916            (None, false) => {
917                assert_ne!(
918                    self.guards.active_set.universe_type(),
919                    UniverseType::BridgeSet
920                );
921                return Ok(RetireCircuits::None); // nothing to do
922            }
923            (_, true) if !self.storage.can_store() => {
924                // TODO: Ideally we would try to upgrade, obtaining an exclusive lock,
925                // but `StorageHandle` currently lacks a method for that.
926                return Err(GuardMgrConfigError::NoLock("bridges configured".into()));
927            }
928            (Some(current_bridges), true) if new_config.bridges() == current_bridges.as_ref() => {
929                assert_eq!(
930                    self.guards.active_set.universe_type(),
931                    UniverseType::BridgeSet
932                );
933                return Ok(RetireCircuits::None); // nothing to do.
934            }
935            (_, true) => {
936                self.configured_bridges = Some(new_config.bridges().into());
937                self.guards.active_set = GuardSetSelector::Bridges;
938            }
939            (_, false) => {
940                self.configured_bridges = None;
941                self.guards.active_set = GuardSetSelector::Default;
942            }
943        }
944
945        // If we have gotten here, we have changed the set of bridges, changed
946        // which set is active, or changed them both.  We need to make sure that
947        // our `GuardSet` object is up-to-date with our configuration.
948        self.update(wallclock, now);
949
950        // We also need to tell the caller that its circuits are no good any
951        // more.
952        //
953        // TODO(nickm): Someday we can do this more judiciously by retuning
954        // "Some" in the case where we're still using bridges but our new bridge
955        // set contains different elements; see comment on RetireCircuits.
956        //
957        // TODO(nickm): We could also safely return RetireCircuits::None if we
958        // are using bridges, and our new bridge list is a superset of the older
959        // one.
960        Ok(RetireCircuits::All)
961    }
962
963    /// Update our parameters, our selection (based on network parameters and
964    /// configuration), and make sure the active GuardSet has the right
965    /// configuration itself.
966    ///
967    /// We should call this whenever the NetDir's parameters change, or whenever
968    /// our filter changes.  We do not need to call it for new elements arriving
969    /// in our Universe, since those do not affect anything here.
970    ///
971    /// We should also call this whenever a new GuardSet becomes active for any
972    /// reason _other_ than just having called this function.
973    ///
974    /// (This function is only invoked from `update`, which should be called
975    /// under the above circumstances.)
976    fn update_active_set_params_and_filter(&mut self, netdir: Option<&NetDir>) {
977        // Set the parameters.  These always come from the NetDir, even if this
978        // is a bridge set.
979        if let Some(netdir) = netdir {
980            match GuardParams::try_from(netdir.params()) {
981                Ok(params) => self.params = params,
982                Err(e) => warn!("Unusable guard parameters from consensus: {}", e),
983            }
984
985            self.select_guard_set_based_on_filter(netdir);
986        }
987
988        // Change the filter, if it doesn't match what the guards have.
989        //
990        // TODO(nickm): We could use a "dirty" flag or something to decide
991        // whether we need to call set_filter, if this comparison starts to show
992        // up in profiles.
993        if self.guards.active_guards().filter() != &self.filter {
994            let restrictive = self.guards.active_set == GuardSetSelector::Restricted;
995            self.guards
996                .active_guards_mut()
997                .set_filter(self.filter.clone(), restrictive);
998        }
999    }
1000
1001    /// Update the status of every guard in `active_guards`, and expand it as
1002    /// needed.
1003    ///
1004    /// This function doesn't take `&self`, to make sure that we are only
1005    /// affecting a single `GuardSet`, and to avoid confusing the borrow
1006    /// checker.
1007    ///
1008    /// We should call this whenever the contents of the universe have changed.
1009    ///
1010    /// We should also call this whenever a new GuardSet becomes active.
1011    fn update_guardset_internal<U: Universe>(
1012        params: &GuardParams,
1013        now: SystemTime,
1014        universe_type: UniverseType,
1015        active_guards: &mut GuardSet,
1016        universe: Option<&U>,
1017    ) -> ExtendedStatus {
1018        // Expire guards.  Do that early, in case doing so makes it clear that
1019        // we need to grab more guards or mark others as primary.
1020        active_guards.expire_old_guards(params, now);
1021
1022        let extended = if let Some(universe) = universe {
1023            // TODO: This check here may be completely unnecessary. I inserted
1024            // it back in 5ac0fcb7ef603e0d14 because I was originally concerned
1025            // it might be undesirable to list a primary guard as "missing dir
1026            // info" (and therefore unusable) if we were expecting to get its
1027            // microdescriptor "very soon."
1028            //
1029            // But due to the other check in `netdir_is_sufficient`, we
1030            // shouldn't be installing a netdir until it has microdescs for all
1031            // of the (non-bridge) primary guards that it lists. - nickm
1032            let n = active_guards.n_primary_without_id_info_in(universe);
1033            if n > 0 && universe_type == UniverseType::NetDir {
1034                // We are missing the information from a NetDir needed to see
1035                // whether our primary guards are listed, so we shouldn't update
1036                // our guard status.
1037                //
1038                // We don't want to do this check if we are using bridges, since
1039                // a missing bridge descriptor is not guaranteed to temporary
1040                // problem in the same way that a missing microdescriptor is.
1041                // (When a bridge desc is missing, the bridge could be down or
1042                // unreachable, and nobody else can help us. But if a microdesc
1043                // is missing, we just need to find a cache that has it.)
1044                trace!(
1045                    n_primary_without_id_info = n,
1046                    "Not extending guardset, missing information."
1047                );
1048                return ExtendedStatus::No;
1049            }
1050            active_guards.update_status_from_dir(universe);
1051            active_guards.extend_sample_as_needed(now, params, universe)
1052        } else {
1053            trace!("Not extending guardset, no universe given.");
1054            ExtendedStatus::No
1055        };
1056
1057        active_guards.select_primary_guards(params);
1058
1059        extended
1060    }
1061
1062    /// If using bridges, tell the BridgeDescProvider which descriptors we want.
1063    /// We need to check this *after* we select our primary guards.
1064    #[cfg(feature = "bridge-client")]
1065    fn update_desired_descriptors(&mut self, now: Instant) {
1066        if self.guards.active_set.universe_type() != UniverseType::BridgeSet {
1067            return;
1068        }
1069
1070        let provider = self.bridge_desc_provider.as_ref().and_then(Weak::upgrade);
1071        let bridge_set = self.latest_bridge_set();
1072        if let (Some(provider), Some(bridge_set)) = (provider, bridge_set) {
1073            let desired: Vec<_> = self
1074                .guards
1075                .active_guards()
1076                .descriptors_to_request(now, &self.params)
1077                .into_iter()
1078                .flat_map(|guard| bridge_set.bridge_by_guard(guard))
1079                .cloned()
1080                .collect();
1081
1082            provider.set_bridges(&desired);
1083        }
1084    }
1085
1086    /// Replace the active guard state with `new_state`, preserving
1087    /// non-persistent state for any guards that are retained.
1088    #[instrument(level = "trace", skip_all)]
1089    fn replace_guards_with(
1090        &mut self,
1091        mut new_guards: GuardSets,
1092        wallclock: SystemTime,
1093        now: Instant,
1094    ) {
1095        std::mem::swap(&mut self.guards, &mut new_guards);
1096        self.guards.copy_status_from(new_guards);
1097        self.update(wallclock, now);
1098    }
1099
1100    /// Update which guard set is active based on the current filter and the
1101    /// provided netdir.
1102    ///
1103    /// After calling this function, the new guard set's filter may be
1104    /// out-of-date: be sure to call `set_filter` as appropriate.
1105    fn select_guard_set_based_on_filter(&mut self, netdir: &NetDir) {
1106        // In general, we'd like to use the restricted set if we're under the
1107        // threshold, and the default set if we're over the threshold.  But if
1108        // we're sitting close to the threshold, we want to avoid flapping back
1109        // and forth, so we only change when we're more than 5% "off" from
1110        // whatever our current setting is.
1111        //
1112        // (See guard-spec section 2 for more information.)
1113        let offset = match self.guards.active_set {
1114            GuardSetSelector::Default => -0.05,
1115            GuardSetSelector::Restricted => 0.05,
1116            // If we're using bridges, then we don't switch between the other guard sets based on on the filter at all.
1117            #[cfg(feature = "bridge-client")]
1118            GuardSetSelector::Bridges => return,
1119        };
1120        let frac_permitted = self.filter.frac_bw_permitted(netdir);
1121        let threshold = self.params.filter_threshold + offset;
1122        let new_choice = if frac_permitted < threshold {
1123            GuardSetSelector::Restricted
1124        } else {
1125            GuardSetSelector::Default
1126        };
1127
1128        if new_choice != self.guards.active_set {
1129            info!(
1130                "Guard selection changed; we are now using the {:?} guard set",
1131                &new_choice
1132            );
1133
1134            self.guards.active_set = new_choice;
1135
1136            if frac_permitted < self.params.extreme_threshold {
1137                warn!(
1138                    "The number of guards permitted is smaller than the recommended minimum of {:.0}%.",
1139                    self.params.extreme_threshold * 100.0,
1140                );
1141            }
1142        }
1143    }
1144
1145    /// Mark all of our primary guards as retriable, if we haven't done
1146    /// so since long enough before `now`.
1147    ///
1148    /// We want to call this function whenever a guard attempt succeeds,
1149    /// if the internet seemed to be down when the guard attempt was
1150    /// first launched.
1151    fn maybe_retry_primary_guards(&mut self, now: Instant) {
1152        // We don't actually want to mark our primary guards as
1153        // retriable more than once per internet_down_timeout: after
1154        // the first time, we would just be noticing the same "coming
1155        // back online" event more than once.
1156        let interval = self.params.internet_down_timeout;
1157        if self.last_primary_retry_time + interval <= now {
1158            debug!(
1159                "Successfully reached a guard after a while off the internet; marking all primary guards retriable."
1160            );
1161            self.guards
1162                .active_guards_mut()
1163                .mark_primary_guards_retriable();
1164            self.last_primary_retry_time = now;
1165        }
1166    }
1167
1168    /// Replace the current GuardFilter with `filter`.
1169    #[instrument(level = "trace", skip_all)]
1170    fn set_filter(&mut self, filter: GuardFilter, wallclock: SystemTime, now: Instant) {
1171        self.filter = filter;
1172        self.update(wallclock, now);
1173    }
1174
1175    /// Called when the circuit manager reports (via [`GuardMonitor`]) that
1176    /// a guard succeeded or failed.
1177    ///
1178    /// Changes the guard's status as appropriate, and updates the pending
1179    /// request as needed.
1180    #[allow(clippy::cognitive_complexity)]
1181    pub(crate) fn handle_msg(
1182        &mut self,
1183        request_id: RequestId,
1184        status: GuardStatus,
1185        skew: Option<ClockSkew>,
1186        runtime: &impl tor_rtcompat::SleepProvider,
1187    ) {
1188        if let Some(mut pending) = self.pending.remove(&request_id) {
1189            // If there was a pending request matching this RequestId, great!
1190            let guard_id = pending.guard_id();
1191            trace!(?guard_id, ?status, "Received report of guard status");
1192
1193            // First, handle the skew report (if any)
1194            if let Some(skew) = skew {
1195                let now = runtime.now();
1196                let observation = skew::SkewObservation { skew, when: now };
1197
1198                match &guard_id.0 {
1199                    FirstHopIdInner::Guard(_, id) => {
1200                        self.guards.active_guards_mut().record_skew(id, observation);
1201                    }
1202                    FirstHopIdInner::Fallback(id) => {
1203                        self.fallbacks.note_skew(id, observation);
1204                    }
1205                }
1206                // TODO: We call this whenever we receive an observed clock
1207                // skew. That's not the perfect timing for two reasons.  First
1208                // off, it might be too frequent: it does an O(n) calculation,
1209                // which isn't ideal.  Second, it might be too infrequent: after
1210                // an hour has passed, a given observation won't be up-to-date
1211                // any more, and we might want to recalculate the skew
1212                // accordingly.
1213                self.update_skew(now);
1214            }
1215
1216            match (status, &guard_id.0) {
1217                (GuardStatus::Failure, FirstHopIdInner::Fallback(id)) => {
1218                    // We used a fallback, and we weren't able to build a circuit through it.
1219                    self.fallbacks.note_failure(id, runtime.now());
1220                }
1221                (_, FirstHopIdInner::Fallback(_)) => {
1222                    // We don't record any other kind of circuit activity if we
1223                    // took the entry from the fallback list.
1224                }
1225                (GuardStatus::Success, FirstHopIdInner::Guard(sample, id)) => {
1226                    // If we had gone too long without any net activity when we
1227                    // gave out this guard, and now we're seeing a circuit
1228                    // succeed, tell the primary guards that they might be
1229                    // retriable.
1230                    if pending.net_has_been_down() {
1231                        self.maybe_retry_primary_guards(runtime.now());
1232                    }
1233
1234                    // The guard succeeded.  Tell the GuardSet.
1235                    self.guards.guards_mut(sample).record_success(
1236                        id,
1237                        &self.params,
1238                        None,
1239                        runtime.wallclock(),
1240                    );
1241                    // Either tell the request whether the guard is
1242                    // usable, or schedule it as a "waiting" request.
1243                    if let Some(usable) = self.guard_usability_status(&pending, runtime.now()) {
1244                        trace!(?guard_id, usable, "Known usability status");
1245                        pending.reply(usable);
1246                    } else {
1247                        // This is the one case where we can't use the
1248                        // guard yet.
1249                        trace!(?guard_id, "Not able to answer right now");
1250                        pending.mark_waiting(runtime.now());
1251                        self.waiting.push(pending);
1252                    }
1253                }
1254                (GuardStatus::Failure, FirstHopIdInner::Guard(sample, id)) => {
1255                    self.guards
1256                        .guards_mut(sample)
1257                        .record_failure(id, None, runtime.now());
1258                    pending.reply(false);
1259                }
1260                (GuardStatus::AttemptAbandoned, FirstHopIdInner::Guard(sample, id)) => {
1261                    self.guards.guards_mut(sample).record_attempt_abandoned(id);
1262                    pending.reply(false);
1263                }
1264                (GuardStatus::Indeterminate, FirstHopIdInner::Guard(sample, id)) => {
1265                    self.guards
1266                        .guards_mut(sample)
1267                        .record_indeterminate_result(id);
1268                    pending.reply(false);
1269                }
1270            };
1271        } else {
1272            warn!(
1273                "Got a status {:?} for a request {:?} that wasn't pending",
1274                status, request_id
1275            );
1276        }
1277
1278        // We might need to update the primary guards based on changes in the
1279        // status of guards above.
1280        self.guards
1281            .active_guards_mut()
1282            .select_primary_guards(&self.params);
1283
1284        // Some waiting request may just have become ready (usable or
1285        // not); we need to give them the information they're waiting
1286        // for.
1287        self.expire_and_answer_pending_requests(runtime.now());
1288    }
1289
1290    /// Helper to implement `GuardMgr::note_external_success()`.
1291    ///
1292    /// (This has to be a separate function so that we can borrow params while
1293    /// we have `mut self` borrowed.)
1294    fn record_external_success<T>(
1295        &mut self,
1296        identity: &T,
1297        external_activity: ExternalActivity,
1298        now: SystemTime,
1299    ) where
1300        T: tor_linkspec::HasRelayIds + ?Sized,
1301    {
1302        for id in self.lookup_ids(identity) {
1303            match &id.0 {
1304                FirstHopIdInner::Guard(sample, id) => {
1305                    self.guards.guards_mut(sample).record_success(
1306                        id,
1307                        &self.params,
1308                        Some(external_activity),
1309                        now,
1310                    );
1311                }
1312                FirstHopIdInner::Fallback(id) => {
1313                    if external_activity == ExternalActivity::DirCache {
1314                        self.fallbacks.note_success(id);
1315                    }
1316                }
1317            }
1318        }
1319    }
1320
1321    /// Return an iterator over all of the clock skew observations we've made
1322    /// for guards or fallbacks.
1323    fn skew_observations(&self) -> impl Iterator<Item = &skew::SkewObservation> {
1324        self.fallbacks
1325            .skew_observations()
1326            .chain(self.guards.active_guards().skew_observations())
1327    }
1328
1329    /// Recalculate our estimated clock skew, and publish it to anybody who
1330    /// cares.
1331    fn update_skew(&mut self, now: Instant) {
1332        let estimate = skew::SkewEstimate::estimate_skew(self.skew_observations(), now);
1333        // TODO: we might want to do this only conditionally, when the skew
1334        // estimate changes.
1335        *self.send_skew.borrow_mut() = estimate;
1336    }
1337
1338    /// If the circuit built because of a given [`PendingRequest`] may
1339    /// now be used (or discarded), return `Some(true)` or
1340    /// `Some(false)` respectively.
1341    ///
1342    /// Return None if we can't yet give an answer about whether such
1343    /// a circuit is usable.
1344    fn guard_usability_status(&self, pending: &PendingRequest, now: Instant) -> Option<bool> {
1345        match &pending.guard_id().0 {
1346            FirstHopIdInner::Guard(sample, id) => self.guards.guards(sample).circ_usability_status(
1347                id,
1348                pending.usage(),
1349                &self.params,
1350                now,
1351            ),
1352            // Fallback circuits are usable immediately, since we don't have to wait to
1353            // see whether any _other_ circuit succeeds or fails.
1354            FirstHopIdInner::Fallback(_) => Some(true),
1355        }
1356    }
1357
1358    /// For requests that have been "waiting" for an answer for too long,
1359    /// expire them and tell the circuit manager that their circuits
1360    /// are unusable.
1361    fn expire_and_answer_pending_requests(&mut self, now: Instant) {
1362        // A bit ugly: we use a separate Vec here to avoid borrowing issues,
1363        // and put it back when we're done.
1364        let mut waiting = Vec::new();
1365        std::mem::swap(&mut waiting, &mut self.waiting);
1366
1367        waiting.retain_mut(|pending| {
1368            let expired = pending
1369                .waiting_since()
1370                .and_then(|w| now.checked_duration_since(w))
1371                .map(|d| d >= self.params.np_idle_timeout)
1372                == Some(true);
1373            if expired {
1374                trace!(?pending, "Pending request expired");
1375                pending.reply(false);
1376                return false;
1377            }
1378
1379            // TODO-SPEC: guard_usability_status isn't what the spec says.  It
1380            // says instead that we should look at _circuit_ status, saying:
1381            //  "   Definition: In the algorithm above, C2 "blocks" C1 if:
1382            // * C2 obeys all the restrictions that C1 had to obey, AND
1383            // * C2 has higher priority than C1, AND
1384            // * Either C2 is <complete>, or C2 is <waiting_for_better_guard>,
1385            // or C2 has been <usable_if_no_better_guard> for no more than
1386            // {NONPRIMARY_GUARD_CONNECT_TIMEOUT} seconds."
1387            //
1388            // See comments in sample::GuardSet::circ_usability_status.
1389
1390            if let Some(answer) = self.guard_usability_status(pending, now) {
1391                trace!(?pending, answer, "Pending request now ready");
1392                pending.reply(answer);
1393                return false;
1394            }
1395            true
1396        });
1397
1398        // Put the waiting list back.
1399        std::mem::swap(&mut waiting, &mut self.waiting);
1400    }
1401
1402    /// Return every currently extant FirstHopId for a guard or fallback
1403    /// directory matching (or possibly matching) the provided keys.
1404    ///
1405    /// An identity is _possibly matching_ if it contains some of the IDs in the
1406    /// provided identity, and it has no _contradictory_ identities, but it does
1407    /// not necessarily contain _all_ of those identities.
1408    ///
1409    /// # TODO
1410    ///
1411    /// This function should probably not exist; it's only used so that dirmgr
1412    /// can report successes or failures, since by the time it observes them it
1413    /// doesn't know whether its circuit came from a guard or a fallback.  To
1414    /// solve that, we'll need CircMgr to record and report which one it was
1415    /// using, which will take some more plumbing.
1416    ///
1417    /// TODO relay: we will have to make the change above when we implement
1418    /// relays; otherwise, it would be possible for an attacker to exploit it to
1419    /// mislead us about our guard status.
1420    fn lookup_ids<T>(&self, identity: &T) -> Vec<FirstHopId>
1421    where
1422        T: tor_linkspec::HasRelayIds + ?Sized,
1423    {
1424        use strum::IntoEnumIterator;
1425        let mut vec = Vec::with_capacity(2);
1426
1427        let id = ids::GuardId::from_relay_ids(identity);
1428        for sample in GuardSetSelector::iter() {
1429            let guard_id = match self.guards.guards(&sample).contains(&id) {
1430                Ok(true) => &id,
1431                Err(other) => other,
1432                Ok(false) => continue,
1433            };
1434            vec.push(FirstHopId(FirstHopIdInner::Guard(sample, guard_id.clone())));
1435        }
1436
1437        let id = ids::FallbackId::from_relay_ids(identity);
1438        if self.fallbacks.contains(&id) {
1439            vec.push(id.into());
1440        }
1441
1442        vec
1443    }
1444
1445    /// Run any periodic events that update guard status, and return a
1446    /// duration after which periodic events should next be run.
1447    #[instrument(skip_all, level = "trace")]
1448    pub(crate) fn run_periodic_events(&mut self, wallclock: SystemTime, now: Instant) -> Duration {
1449        self.update(wallclock, now);
1450        self.expire_and_answer_pending_requests(now);
1451        Duration::from_secs(1) // TODO: Too aggressive.
1452    }
1453
1454    /// Try to select a guard, expanding the sample if the first attempt fails.
1455    #[instrument(skip_all, level = "trace")]
1456    fn select_guard_with_expand(
1457        &mut self,
1458        usage: &GuardUsage,
1459        now: Instant,
1460        wallclock: SystemTime,
1461    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1462        // Try to find a guard.
1463        let first_error = match self.select_guard_once(usage, now) {
1464            Ok(res1) => return Ok(res1),
1465            Err(e) => {
1466                trace!("Couldn't select guard on first attempt: {}", e);
1467                e
1468            }
1469        };
1470
1471        // That didn't work. If we have a netdir, expand the sample and try again.
1472        let res = self.with_opt_universe(|this, univ| {
1473            let univ = univ?;
1474            trace!("No guards available, trying to extend the sample.");
1475            // Make sure that the status on all of our guards are accurate, and
1476            // expand the sample if we can.
1477            //
1478            // Our parameters and configuration did not change, so we do not
1479            // need to call update() or update_active_set_and_filter(). This
1480            // call is sufficient to  extend the sample and recompute primary
1481            // guards.
1482            let extended = Self::update_guardset_internal(
1483                &this.params,
1484                wallclock,
1485                this.guards.active_set.universe_type(),
1486                this.guards.active_guards_mut(),
1487                Some(univ),
1488            );
1489            if extended == ExtendedStatus::Yes {
1490                match this.select_guard_once(usage, now) {
1491                    Ok(res) => return Some(res),
1492                    Err(e) => {
1493                        trace!("Couldn't select guard after update: {}", e);
1494                    }
1495                }
1496            }
1497            None
1498        });
1499        if let Some(res) = res {
1500            return Ok(res);
1501        }
1502
1503        // Okay, that didn't work either.  If we were asked for a directory
1504        // guard, and we aren't using bridges, then we may be able to use a
1505        // fallback.
1506        if usage.kind == GuardUsageKind::OneHopDirectory
1507            && self.guards.active_set.universe_type() == UniverseType::NetDir
1508        {
1509            return self.select_fallback(now);
1510        }
1511
1512        // Couldn't extend the sample or use a fallback; return the original error.
1513        Err(first_error)
1514    }
1515
1516    /// Helper: try to pick a single guard, without retrying on failure.
1517    fn select_guard_once(
1518        &self,
1519        usage: &GuardUsage,
1520        now: Instant,
1521    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1522        let active_set = &self.guards.active_set;
1523        #[cfg_attr(not(feature = "bridge-client"), allow(unused_mut))]
1524        let (list_kind, mut first_hop) =
1525            self.guards
1526                .guards(active_set)
1527                .pick_guard(active_set, usage, &self.params, now)?;
1528        #[cfg(feature = "bridge-client")]
1529        if self.guards.active_set.universe_type() == UniverseType::BridgeSet {
1530            // See if we can promote first_hop to a viable CircTarget.
1531            let bridges = self.latest_bridge_set().ok_or_else(|| {
1532                PickGuardError::Internal(internal!(
1533                    "No bridge set available, even though this is the Bridges sample"
1534                ))
1535            })?;
1536            first_hop.lookup_bridge_circ_target(&bridges);
1537
1538            if usage.kind == GuardUsageKind::Data && !first_hop.contains_circ_target() {
1539                return Err(PickGuardError::Internal(internal!(
1540                    "Tried to return a non-circtarget guard with Data usage!"
1541                )));
1542            }
1543        }
1544        Ok((list_kind, first_hop))
1545    }
1546
1547    /// Helper: Select a fallback directory.
1548    ///
1549    /// Called when we have no guard information to use. Return values are as
1550    /// for [`GuardMgr::select_guard()`]
1551    fn select_fallback(
1552        &self,
1553        now: Instant,
1554    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1555        let filt = self.guards.active_guards().filter();
1556
1557        let fallback = crate::FirstHop {
1558            sample: None,
1559            inner: crate::FirstHopInner::Chan(OwnedChanTarget::from_chan_target(
1560                self.fallbacks.choose(&mut rand::rng(), now, filt)?,
1561            )),
1562        };
1563        let fallback = filt.modify_hop(fallback)?;
1564        Ok((sample::ListKind::Fallback, fallback))
1565    }
1566}
1567
1568/// A possible outcome of trying to extend a guard sample.
1569#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1570enum ExtendedStatus {
1571    /// The guard sample was extended. (At least one guard was added to it.)
1572    Yes,
1573    /// The guard sample was not extended.
1574    No,
1575}
1576
1577/// A set of parameters, derived from the consensus document, controlling
1578/// the behavior of a guard manager.
1579#[derive(Debug, Clone)]
1580#[cfg_attr(test, derive(PartialEq))]
1581struct GuardParams {
1582    /// How long should a sampled, un-confirmed guard be kept in the sample before it expires?
1583    lifetime_unconfirmed: Duration,
1584    /// How long should a confirmed guard be kept in the sample before
1585    /// it expires?
1586    lifetime_confirmed: Duration,
1587    /// How long may  a guard be unlisted before we remove it from the sample?
1588    lifetime_unlisted: Duration,
1589    /// Largest number of guards we're willing to add to the sample.
1590    max_sample_size: usize,
1591    /// Largest fraction of the network's guard bandwidth that we're
1592    /// willing to add to the sample.
1593    max_sample_bw_fraction: f64,
1594    /// Smallest number of guards that we're willing to have in the
1595    /// sample, after applying a [`GuardFilter`].
1596    min_filtered_sample_size: usize,
1597    /// How many guards are considered "Primary"?
1598    n_primary: usize,
1599    /// When making a regular circuit, how many primary guards should we
1600    /// be willing to try?
1601    data_parallelism: usize,
1602    /// When making a one-hop directory circuit, how many primary
1603    /// guards should we be willing to try?
1604    dir_parallelism: usize,
1605    /// For how long does a pending attempt to connect to a guard
1606    /// block an attempt to use a less-favored non-primary guard?
1607    np_connect_timeout: Duration,
1608    /// How long do we allow a circuit to a successful but unfavored
1609    /// non-primary guard to sit around before deciding not to use it?
1610    np_idle_timeout: Duration,
1611    /// After how much time without successful activity does a
1612    /// successful circuit indicate that we should retry our primary
1613    /// guards?
1614    internet_down_timeout: Duration,
1615    /// What fraction of the guards can be can be filtered out before we
1616    /// decide that our filter is "very restrictive"?
1617    filter_threshold: f64,
1618    /// What fraction of the guards determine that our filter is "very
1619    /// restrictive"?
1620    extreme_threshold: f64,
1621}
1622
1623impl Default for GuardParams {
1624    fn default() -> Self {
1625        let one_day = Duration::from_secs(86400);
1626        GuardParams {
1627            lifetime_unconfirmed: one_day * 120,
1628            lifetime_confirmed: one_day * 60,
1629            lifetime_unlisted: one_day * 20,
1630            max_sample_size: 60,
1631            max_sample_bw_fraction: 0.2,
1632            min_filtered_sample_size: 20,
1633            n_primary: 3,
1634            data_parallelism: 1,
1635            dir_parallelism: 3,
1636            np_connect_timeout: Duration::from_secs(15),
1637            np_idle_timeout: Duration::from_secs(600),
1638            internet_down_timeout: Duration::from_secs(600),
1639            filter_threshold: 0.2,
1640            extreme_threshold: 0.01,
1641        }
1642    }
1643}
1644
1645impl TryFrom<&NetParameters> for GuardParams {
1646    type Error = tor_units::Error;
1647    fn try_from(p: &NetParameters) -> Result<GuardParams, Self::Error> {
1648        Ok(GuardParams {
1649            lifetime_unconfirmed: p.guard_lifetime_unconfirmed.try_into()?,
1650            lifetime_confirmed: p.guard_lifetime_confirmed.try_into()?,
1651            lifetime_unlisted: p.guard_remove_unlisted_after.try_into()?,
1652            max_sample_size: p.guard_max_sample_size.try_into()?,
1653            max_sample_bw_fraction: p.guard_max_sample_threshold.as_fraction(),
1654            min_filtered_sample_size: p.guard_filtered_min_sample_size.try_into()?,
1655            n_primary: p.guard_n_primary.try_into()?,
1656            data_parallelism: p.guard_use_parallelism.try_into()?,
1657            dir_parallelism: p.guard_dir_use_parallelism.try_into()?,
1658            np_connect_timeout: p.guard_nonprimary_connect_timeout.try_into()?,
1659            np_idle_timeout: p.guard_nonprimary_idle_timeout.try_into()?,
1660            internet_down_timeout: p.guard_internet_likely_down.try_into()?,
1661            filter_threshold: p.guard_meaningful_restriction.as_fraction(),
1662            extreme_threshold: p.guard_extreme_restriction.as_fraction(),
1663        })
1664    }
1665}
1666
1667/// Representation of a guard or fallback, as returned by [`GuardMgr::select_guard()`].
1668#[derive(Debug, Clone)]
1669pub struct FirstHop {
1670    /// The sample from which this guard was taken, or `None` if this is a fallback.
1671    sample: Option<GuardSetSelector>,
1672    /// Information about connecting to (or through) this guard.
1673    inner: FirstHopInner,
1674}
1675/// The enumeration inside a FirstHop that holds information about how to
1676/// connect to (and possibly through) a guard or fallback.
1677#[derive(Debug, Clone)]
1678enum FirstHopInner {
1679    /// We have enough information to connect to a guard.
1680    Chan(OwnedChanTarget),
1681    /// We have enough information to connect to a guards _and_ to build
1682    /// multihop circuits through it.
1683    #[cfg_attr(not(feature = "bridge-client"), allow(dead_code))]
1684    Circ(OwnedCircTarget),
1685}
1686
1687impl FirstHop {
1688    /// Return a new [`FirstHopId`] for this `FirstHop`.
1689    fn first_hop_id(&self) -> FirstHopId {
1690        match &self.sample {
1691            Some(sample) => {
1692                let guard_id = GuardId::from_relay_ids(self);
1693                FirstHopId::in_sample(sample.clone(), guard_id)
1694            }
1695            None => {
1696                let fallback_id = crate::ids::FallbackId::from_relay_ids(self);
1697                FirstHopId::from(fallback_id)
1698            }
1699        }
1700    }
1701
1702    /// Look up this guard in `netdir`.
1703    pub fn get_relay<'a>(&self, netdir: &'a NetDir) -> Option<Relay<'a>> {
1704        match &self.sample {
1705            #[cfg(feature = "bridge-client")]
1706            // Always return "None" for anything that isn't in the netdir.
1707            Some(s) if s.universe_type() == UniverseType::BridgeSet => None,
1708            // Otherwise ask the netdir.
1709            _ => netdir.by_ids(self),
1710        }
1711    }
1712
1713    /// Return true if this guard is a bridge.
1714    pub fn is_bridge(&self) -> bool {
1715        match &self.sample {
1716            #[cfg(feature = "bridge-client")]
1717            Some(s) if s.universe_type() == UniverseType::BridgeSet => true,
1718            _ => false,
1719        }
1720    }
1721
1722    /// If possible, return a view of this object that can be used to build a circuit.
1723    pub fn as_circ_target(&self) -> Option<&OwnedCircTarget> {
1724        match &self.inner {
1725            FirstHopInner::Chan(_) => None,
1726            FirstHopInner::Circ(ct) => Some(ct),
1727        }
1728    }
1729
1730    /// Return a view of this as an OwnedChanTarget.
1731    fn chan_target_mut(&mut self) -> &mut OwnedChanTarget {
1732        match &mut self.inner {
1733            FirstHopInner::Chan(ct) => ct,
1734            FirstHopInner::Circ(ct) => ct.chan_target_mut(),
1735        }
1736    }
1737
1738    /// If possible and appropriate, find a circuit target in `bridges` for this
1739    /// `FirstHop`, and make this `FirstHop` a viable circuit target.
1740    ///
1741    /// (By default, any `FirstHop` that a `GuardSet` returns will have enough
1742    /// information to be a `ChanTarget`, but it will be lacking the additional
1743    /// network information in `CircTarget`[^1] necessary for us to build a
1744    /// multi-hop circuit through it.  If this FirstHop is a regular non-bridge
1745    /// `Relay`, then the `CircMgr` will later look up that circuit information
1746    /// itself from the network directory. But if this `FirstHop` *is* a bridge,
1747    /// then we need to find that information in the `BridgeSet`, since the
1748    /// CircMgr does not keep track of the `BridgeSet`.)
1749    ///
1750    /// [^1]: For example, supported protocol versions and ntor keys.
1751    #[cfg(feature = "bridge-client")]
1752    fn lookup_bridge_circ_target(&mut self, bridges: &bridge::BridgeSet) {
1753        use crate::sample::CandidateStatus::Present;
1754        if self.sample.as_ref().map(|s| s.universe_type()) == Some(UniverseType::BridgeSet)
1755            && matches!(self.inner, FirstHopInner::Chan(_))
1756        {
1757            if let Present(bridge_relay) = bridges.bridge_relay_by_guard(self) {
1758                if let Some(circ_target) = bridge_relay.as_relay_with_desc() {
1759                    self.inner =
1760                        FirstHopInner::Circ(OwnedCircTarget::from_circ_target(&circ_target));
1761                }
1762            }
1763        }
1764    }
1765
1766    /// Return true if this `FirstHop` contains circuit target information.
1767    ///
1768    /// This is true if `lookup_bridge_circ_target()` has been called, and it
1769    /// successfully found the circuit target information.
1770    #[cfg(feature = "bridge-client")]
1771    fn contains_circ_target(&self) -> bool {
1772        matches!(self.inner, FirstHopInner::Circ(_))
1773    }
1774}
1775
1776// This is somewhat redundant with the implementations in crate::guard::Guard.
1777impl tor_linkspec::HasAddrs for FirstHop {
1778    fn addrs(&self) -> impl Iterator<Item = SocketAddr> {
1779        match &self.inner {
1780            FirstHopInner::Chan(ct) => Either::Left(ct.addrs()),
1781            FirstHopInner::Circ(ct) => Either::Right(ct.addrs()),
1782        }
1783    }
1784}
1785impl tor_linkspec::HasRelayIds for FirstHop {
1786    fn identity(
1787        &self,
1788        key_type: tor_linkspec::RelayIdType,
1789    ) -> Option<tor_linkspec::RelayIdRef<'_>> {
1790        match &self.inner {
1791            FirstHopInner::Chan(ct) => ct.identity(key_type),
1792            FirstHopInner::Circ(ct) => ct.identity(key_type),
1793        }
1794    }
1795}
1796impl tor_linkspec::HasChanMethod for FirstHop {
1797    fn chan_method(&self) -> tor_linkspec::ChannelMethod {
1798        match &self.inner {
1799            FirstHopInner::Chan(ct) => ct.chan_method(),
1800            FirstHopInner::Circ(ct) => ct.chan_method(),
1801        }
1802    }
1803}
1804impl tor_linkspec::ChanTarget for FirstHop {}
1805
1806/// The purpose for which we plan to use a guard.
1807///
1808/// This can affect the guard selection algorithm.
1809#[derive(Clone, Debug, Default, Eq, PartialEq)]
1810#[non_exhaustive]
1811pub enum GuardUsageKind {
1812    /// We want to use this guard for a data circuit.
1813    ///
1814    /// (This encompasses everything except the `OneHopDirectory` case.)
1815    #[default]
1816    Data,
1817    /// We want to use this guard for a one-hop, non-anonymous
1818    /// directory request.
1819    ///
1820    /// (Our algorithm allows more parallelism for the guards that we use
1821    /// for these circuits.)
1822    OneHopDirectory,
1823}
1824
1825/// A set of parameters describing how a single guard should be selected.
1826///
1827/// Used as an argument to [`GuardMgr::select_guard`].
1828#[derive(Clone, Debug, derive_builder::Builder)]
1829#[builder(build_fn(error = "tor_config::ConfigBuildError"))]
1830pub struct GuardUsage {
1831    /// The purpose for which this guard will be used.
1832    #[builder(default)]
1833    kind: GuardUsageKind,
1834    /// A list of restrictions on which guard may be used.
1835    ///
1836    /// The default is the empty list.
1837    #[builder(sub_builder, setter(custom))]
1838    restrictions: GuardRestrictionList,
1839}
1840
1841impl_standard_builder! { GuardUsage: !Deserialize }
1842
1843/// List of socket restrictions, as configured
1844pub type GuardRestrictionList = Vec<GuardRestriction>;
1845
1846define_list_builder_helper! {
1847    pub struct GuardRestrictionListBuilder {
1848        restrictions: [GuardRestriction],
1849    }
1850    built: GuardRestrictionList = restrictions;
1851    default = vec![];
1852    item_build: |restriction| Ok(restriction.clone());
1853}
1854
1855define_list_builder_accessors! {
1856    struct GuardUsageBuilder {
1857        pub restrictions: [GuardRestriction],
1858    }
1859}
1860
1861impl GuardUsageBuilder {
1862    /// Create a new empty [`GuardUsageBuilder`].
1863    pub fn new() -> Self {
1864        Self::default()
1865    }
1866}
1867
1868/// A restriction that applies to a single request for a guard.
1869///
1870/// Restrictions differ from filters (see [`GuardFilter`]) in that
1871/// they apply to single requests, not to our entire set of guards.
1872/// They're suitable for things like making sure that we don't start
1873/// and end a circuit at the same relay, or requiring a specific
1874/// subprotocol version for certain kinds of requests.
1875#[derive(Clone, Debug, Serialize, Deserialize)]
1876#[non_exhaustive]
1877pub enum GuardRestriction {
1878    /// Don't pick a guard with the provided identity.
1879    AvoidId(RelayId),
1880    /// Don't pick a guard with any of the provided Ed25519 identities.
1881    AvoidAllIds(RelayIdSet),
1882}
1883
1884/// The kind of vanguards to use.
1885#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1886#[derive(Serialize, Deserialize)] //
1887#[derive(derive_more::Display)] //
1888#[serde(rename_all = "lowercase")]
1889#[cfg(feature = "vanguards")]
1890#[non_exhaustive]
1891pub enum VanguardMode {
1892    /// "Lite" vanguards.
1893    #[default]
1894    #[display("lite")]
1895    Lite = 1,
1896    /// "Full" vanguards.
1897    #[display("full")]
1898    Full = 2,
1899    /// Vanguards are disabled.
1900    #[display("disabled")]
1901    Disabled = 0,
1902}
1903
1904#[cfg(feature = "vanguards")]
1905impl VanguardMode {
1906    /// Build a `VanguardMode` from a [`NetParameters`] parameter.
1907    ///
1908    /// Used for converting [`vanguards_enabled`](NetParameters::vanguards_enabled)
1909    /// or [`vanguards_hs_service`](NetParameters::vanguards_hs_service)
1910    /// to the corresponding `VanguardMode`.
1911    pub(crate) fn from_net_parameter(val: BoundedInt32<0, 2>) -> Self {
1912        match val.get() {
1913            0 => VanguardMode::Disabled,
1914            1 => VanguardMode::Lite,
1915            2 => VanguardMode::Full,
1916            _ => unreachable!("BoundedInt32 was not bounded?!"),
1917        }
1918    }
1919}
1920
1921impl_not_auto_value!(VanguardMode);
1922
1923/// Vanguards configuration.
1924#[derive(Deftly, Clone, Debug, PartialEq, Eq)]
1925#[derive_deftly(TorConfig)]
1926pub struct VanguardConfig {
1927    /// The kind of vanguards to use.
1928    #[deftly(tor_config(default))]
1929    mode: ExplicitOrAuto<VanguardMode>,
1930}
1931
1932impl VanguardConfig {
1933    /// Return the configured [`VanguardMode`].
1934    ///
1935    /// Returns the [`Default`] `VanguardMode`
1936    /// if the mode is [`Auto`](ExplicitOrAuto) or unspecified.
1937    pub fn mode(&self) -> VanguardMode {
1938        match self.mode {
1939            ExplicitOrAuto::Auto => Default::default(),
1940            ExplicitOrAuto::Explicit(mode) => mode,
1941        }
1942    }
1943}
1944
1945/// The kind of vanguards to use.
1946#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1947#[derive(Serialize, Deserialize)] //
1948#[derive(derive_more::Display)] //
1949#[serde(rename_all = "lowercase")]
1950#[cfg(not(feature = "vanguards"))]
1951#[non_exhaustive]
1952pub enum VanguardMode {
1953    /// Vanguards are disabled.
1954    #[default]
1955    #[display("disabled")]
1956    Disabled = 0,
1957}
1958
1959#[cfg(test)]
1960mod test {
1961    // @@ begin test lint list maintained by maint/add_warning @@
1962    #![allow(clippy::bool_assert_comparison)]
1963    #![allow(clippy::clone_on_copy)]
1964    #![allow(clippy::dbg_macro)]
1965    #![allow(clippy::mixed_attributes_style)]
1966    #![allow(clippy::print_stderr)]
1967    #![allow(clippy::print_stdout)]
1968    #![allow(clippy::single_char_pattern)]
1969    #![allow(clippy::unwrap_used)]
1970    #![allow(clippy::unchecked_time_subtraction)]
1971    #![allow(clippy::useless_vec)]
1972    #![allow(clippy::needless_pass_by_value)]
1973    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
1974    use super::*;
1975    use tor_linkspec::{HasAddrs, HasRelayIds};
1976    use tor_persist::TestingStateMgr;
1977    use tor_rtcompat::test_with_all_runtimes;
1978
1979    #[test]
1980    fn guard_param_defaults() {
1981        let p1 = GuardParams::default();
1982        let p2: GuardParams = (&NetParameters::default()).try_into().unwrap();
1983        assert_eq!(p1, p2);
1984    }
1985
1986    fn init<R: Runtime>(rt: R) -> (GuardMgr<R>, TestingStateMgr, NetDir) {
1987        use tor_netdir::{MdReceiver, PartialNetDir, testnet};
1988        let statemgr = TestingStateMgr::new();
1989        let have_lock = statemgr.try_lock().unwrap();
1990        assert!(have_lock.held());
1991        let guardmgr = GuardMgr::new(rt, statemgr.clone(), &TestConfig::default()).unwrap();
1992        let (con, mds) = testnet::construct_network().unwrap();
1993        let param_overrides = vec![
1994            // We make the sample size smaller than usual to compensate for the
1995            // small testing network.  (Otherwise, we'd sample the whole network,
1996            // and not be able to observe guards in the tests.)
1997            "guard-min-filtered-sample-size=5",
1998            // We choose only two primary guards, to make the tests easier to write.
1999            "guard-n-primary-guards=2",
2000            // We define any restriction that allows 75% or fewer of relays as "meaningful",
2001            // so that we can test the "restrictive" guard sample behavior, and to avoid
2002            "guard-meaningful-restriction-percent=75",
2003        ];
2004        let param_overrides: String =
2005            itertools::Itertools::intersperse(param_overrides.into_iter(), " ").collect();
2006        let override_p = param_overrides.parse().unwrap();
2007        let mut netdir = PartialNetDir::new(con, Some(&override_p));
2008        for md in mds {
2009            netdir.add_microdesc(md);
2010        }
2011        let netdir = netdir.unwrap_if_sufficient().unwrap();
2012
2013        (guardmgr, statemgr, netdir)
2014    }
2015
2016    #[test]
2017    #[allow(clippy::clone_on_copy)]
2018    fn simple_case() {
2019        test_with_all_runtimes!(|rt| async move {
2020            let (guardmgr, statemgr, netdir) = init(rt.clone());
2021            let usage = GuardUsage::default();
2022            guardmgr.install_test_netdir(&netdir);
2023
2024            let (id, mon, usable) = guardmgr.select_guard(usage).unwrap();
2025            // Report that the circuit succeeded.
2026            mon.succeeded();
2027
2028            // May we use the circuit?
2029            let usable = usable.await.unwrap();
2030            assert!(usable);
2031
2032            // Save the state...
2033            guardmgr.flush_msg_queue().await;
2034            guardmgr.store_persistent_state().unwrap();
2035            drop(guardmgr);
2036
2037            // Try reloading from the state...
2038            let guardmgr2 =
2039                GuardMgr::new(rt.clone(), statemgr.clone(), &TestConfig::default()).unwrap();
2040            guardmgr2.install_test_netdir(&netdir);
2041
2042            // Since the guard was confirmed, we should get the same one this time!
2043            let usage = GuardUsage::default();
2044            let (id2, _mon, _usable) = guardmgr2.select_guard(usage).unwrap();
2045            assert!(id2.same_relay_ids(&id));
2046        });
2047    }
2048
2049    #[test]
2050    fn simple_waiting() {
2051        // TODO(nickm): This test fails in rare cases; I suspect a
2052        // race condition somewhere.
2053        //
2054        // I've doubled up on the queue flushing in order to try to make the
2055        // race less likely, but we should investigate.
2056        test_with_all_runtimes!(|rt| async move {
2057            let (guardmgr, _statemgr, netdir) = init(rt);
2058            let u = GuardUsage::default();
2059            guardmgr.install_test_netdir(&netdir);
2060
2061            // We'll have the first two guard fail, which should make us
2062            // try a non-primary guard.
2063            let (id1, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2064            mon.failed();
2065            guardmgr.flush_msg_queue().await; // avoid race
2066            guardmgr.flush_msg_queue().await; // avoid race
2067            let (id2, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2068            mon.failed();
2069            guardmgr.flush_msg_queue().await; // avoid race
2070            guardmgr.flush_msg_queue().await; // avoid race
2071
2072            assert!(!id1.same_relay_ids(&id2));
2073
2074            // Now we should get two sampled guards. They should be different.
2075            let (id3, mon3, usable3) = guardmgr.select_guard(u.clone()).unwrap();
2076            let (id4, mon4, usable4) = guardmgr.select_guard(u.clone()).unwrap();
2077            assert!(!id3.same_relay_ids(&id4));
2078
2079            let (u3, u4) = futures::join!(
2080                async {
2081                    mon3.failed();
2082                    guardmgr.flush_msg_queue().await; // avoid race
2083                    usable3.await.unwrap()
2084                },
2085                async {
2086                    mon4.succeeded();
2087                    usable4.await.unwrap()
2088                }
2089            );
2090
2091            assert_eq!((u3, u4), (false, true));
2092        });
2093    }
2094
2095    #[test]
2096    fn filtering_basics() {
2097        test_with_all_runtimes!(|rt| async move {
2098            let (guardmgr, _statemgr, netdir) = init(rt);
2099            let u = GuardUsage::default();
2100            let filter = {
2101                let mut f = GuardFilter::default();
2102                // All the addresses in the test network are {0,1,2,3,4}.0.0.3:9001.
2103                // Limit to only 2.0.0.0/8
2104                f.push_reachable_addresses(vec!["2.0.0.0/8:9001".parse().unwrap()]);
2105                f
2106            };
2107            guardmgr.set_filter(filter);
2108            guardmgr.install_test_netdir(&netdir);
2109            let (guard, _mon, _usable) = guardmgr.select_guard(u).unwrap();
2110            // Make sure that the filter worked.
2111            let addr = guard.addrs().next().unwrap();
2112            assert_eq!(addr, "2.0.0.3:9001".parse().unwrap());
2113        });
2114    }
2115
2116    #[test]
2117    fn external_status() {
2118        test_with_all_runtimes!(|rt| async move {
2119            let (guardmgr, _statemgr, netdir) = init(rt);
2120            let data_usage = GuardUsage::default();
2121            let dir_usage = GuardUsageBuilder::new()
2122                .kind(GuardUsageKind::OneHopDirectory)
2123                .build()
2124                .unwrap();
2125            guardmgr.install_test_netdir(&netdir);
2126            {
2127                // Override this parameter, so that we can get deterministic results below.
2128                let mut inner = guardmgr.inner.lock().unwrap();
2129                inner.params.dir_parallelism = 1;
2130            }
2131
2132            let (guard, mon, _usable) = guardmgr.select_guard(data_usage.clone()).unwrap();
2133            mon.succeeded();
2134
2135            // Record that this guard gave us a bad directory object.
2136            guardmgr.note_external_failure(&guard, ExternalActivity::DirCache);
2137
2138            // We ask for another guard, for data usage.  We should get the same
2139            // one as last time, since the director failure doesn't mean this
2140            // guard is useless as a primary guard.
2141            let (g2, mon, _usable) = guardmgr.select_guard(data_usage).unwrap();
2142            assert_eq!(g2.ed_identity(), guard.ed_identity());
2143            mon.succeeded();
2144
2145            // But if we ask for a guard for directory usage, we should get a
2146            // different one, since the last guard we gave out failed.
2147            let (g3, mon, _usable) = guardmgr.select_guard(dir_usage.clone()).unwrap();
2148            assert_ne!(g3.ed_identity(), guard.ed_identity());
2149            mon.succeeded();
2150
2151            // Now record a success for for directory usage.
2152            guardmgr.note_external_success(&guard, ExternalActivity::DirCache);
2153
2154            // Now that the guard is working as a cache, asking for it should get us the same guard.
2155            let (g4, _mon, _usable) = guardmgr.select_guard(dir_usage).unwrap();
2156            assert_eq!(g4.ed_identity(), guard.ed_identity());
2157        });
2158    }
2159
2160    #[cfg(feature = "vanguards")]
2161    #[test]
2162    fn vanguard_mode_ord() {
2163        assert!(VanguardMode::Disabled < VanguardMode::Lite);
2164        assert!(VanguardMode::Disabled < VanguardMode::Full);
2165        assert!(VanguardMode::Lite < VanguardMode::Full);
2166    }
2167}
tor_guardmgr/lib.rs

tor_guardmgr/
lib.rs