tor_guardmgr/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3// @@ begin lint list maintained by maint/add_warning @@
4#![allow(renamed_and_removed_lints)] // @@REMOVE_WHEN(ci_arti_stable)
5#![allow(unknown_lints)] // @@REMOVE_WHEN(ci_arti_nightly)
6#![warn(missing_docs)]
7#![warn(noop_method_call)]
8#![warn(unreachable_pub)]
9#![warn(clippy::all)]
10#![deny(clippy::await_holding_lock)]
11#![deny(clippy::cargo_common_metadata)]
12#![deny(clippy::cast_lossless)]
13#![deny(clippy::checked_conversions)]
14#![warn(clippy::cognitive_complexity)]
15#![deny(clippy::debug_assert_with_mut_call)]
16#![deny(clippy::exhaustive_enums)]
17#![deny(clippy::exhaustive_structs)]
18#![deny(clippy::expl_impl_clone_on_copy)]
19#![deny(clippy::fallible_impl_from)]
20#![deny(clippy::implicit_clone)]
21#![deny(clippy::large_stack_arrays)]
22#![warn(clippy::manual_ok_or)]
23#![deny(clippy::missing_docs_in_private_items)]
24#![warn(clippy::needless_borrow)]
25#![warn(clippy::needless_pass_by_value)]
26#![warn(clippy::option_option)]
27#![deny(clippy::print_stderr)]
28#![deny(clippy::print_stdout)]
29#![warn(clippy::rc_buffer)]
30#![deny(clippy::ref_option_ref)]
31#![warn(clippy::semicolon_if_nothing_returned)]
32#![warn(clippy::trait_duplication_in_bounds)]
33#![deny(clippy::unchecked_time_subtraction)]
34#![deny(clippy::unnecessary_wraps)]
35#![warn(clippy::unseparated_literal_suffix)]
36#![deny(clippy::unwrap_used)]
37#![deny(clippy::mod_module_files)]
38#![allow(clippy::let_unit_value)] // This can reasonably be done for explicitness
39#![allow(clippy::uninlined_format_args)]
40#![allow(clippy::significant_drop_in_scrutinee)] // arti/-/merge_requests/588/#note_2812945
41#![allow(clippy::result_large_err)] // temporary workaround for arti#587
42#![allow(clippy::needless_raw_string_hashes)] // complained-about code is fine, often best
43#![allow(clippy::needless_lifetimes)] // See arti#1765
44#![allow(mismatched_lifetime_syntaxes)] // temporary workaround for arti#2060
45//! <!-- @@ end lint list maintained by maint/add_warning @@ -->
46
47// TODO #1645 (either remove this, or decide to have it everywhere)
48#![cfg_attr(not(all(feature = "full", feature = "experimental")), allow(unused))]
49
50// Glossary:
51//     Primary guard
52//     Sample
53//     confirmed
54//     filtered
55
56use futures::channel::mpsc;
57use itertools::Either;
58use serde::{Deserialize, Serialize};
59use std::collections::HashMap;
60use std::net::SocketAddr;
61use std::sync::{Arc, Mutex, Weak};
62use std::time::{Duration, Instant, SystemTime};
63#[cfg(feature = "bridge-client")]
64use tor_error::internal;
65use tor_linkspec::{OwnedChanTarget, OwnedCircTarget, RelayId, RelayIdSet};
66use tor_netdir::NetDirProvider;
67use tor_proto::ClockSkew;
68use tor_rtcompat::SpawnExt;
69use tor_units::BoundedInt32;
70use tracing::{debug, info, instrument, trace, warn};
71
72use tor_config::{ExplicitOrAuto, impl_standard_builder};
73use tor_config::{ReconfigureError, impl_not_auto_value};
74use tor_config::{define_list_builder_accessors, define_list_builder_helper};
75use tor_netdir::{NetDir, Relay, params::NetParameters};
76use tor_persist::{DynStorageHandle, StateMgr};
77use tor_rtcompat::Runtime;
78
79#[cfg(feature = "bridge-client")]
80pub mod bridge;
81mod config;
82mod daemon;
83mod dirstatus;
84mod err;
85mod events;
86pub mod fallback;
87mod filter;
88mod guard;
89mod ids;
90mod pending;
91mod sample;
92mod skew;
93mod util;
94#[cfg(feature = "vanguards")]
95pub mod vanguards;
96
97#[cfg(not(feature = "bridge-client"))]
98#[path = "bridge_disabled.rs"]
99pub mod bridge;
100
101#[cfg(any(test, feature = "testing"))]
102pub use config::testing::TestConfig;
103
104#[cfg(test)]
105use oneshot_fused_workaround as oneshot;
106
107pub use config::GuardMgrConfig;
108pub use err::{GuardMgrConfigError, GuardMgrError, PickGuardError};
109pub use events::ClockSkewEvents;
110pub use filter::GuardFilter;
111pub use ids::FirstHopId;
112pub use pending::{GuardMonitor, GuardStatus, GuardUsable};
113pub use skew::SkewEstimate;
114
115#[cfg(feature = "vanguards")]
116#[cfg_attr(docsrs, doc(cfg(feature = "vanguards")))]
117pub use vanguards::VanguardMgrError;
118
119use pending::{PendingRequest, RequestId};
120use sample::{GuardSet, Universe, UniverseRef};
121
122use crate::ids::{FirstHopIdInner, GuardId};
123
124use tor_config::ConfigBuildError;
125
126/// A "guard manager" that selects and remembers a persistent set of
127/// guard nodes.
128///
129/// This is a "handle"; clones of it share state.
130#[derive(Clone)]
131pub struct GuardMgr<R: Runtime> {
132    /// An asynchronous runtime object.
133    ///
134    /// GuardMgr uses this runtime for timing, timeouts, and spawning
135    /// tasks.
136    runtime: R,
137
138    /// Internal state for the guard manager.
139    inner: Arc<Mutex<GuardMgrInner>>,
140}
141
142/// Helper type that holds the data used by a [`GuardMgr`].
143///
144/// This would just be a [`GuardMgr`], except that it needs to sit inside
145/// a `Mutex` and get accessed by daemon tasks.
146struct GuardMgrInner {
147    /// Last time when marked all of our primary guards as retriable.
148    ///
149    /// We keep track of this time so that we can rate-limit
150    /// these attempts.
151    last_primary_retry_time: Instant,
152
153    /// Persistent guard manager state.
154    ///
155    /// This object remembers one or more persistent set of guards that we can
156    /// use, along with their relative priorities and statuses.
157    guards: GuardSets,
158
159    /// The current filter that we're using to decide which guards are
160    /// supported.
161    //
162    // TODO: This field is duplicated in the current active [`GuardSet`]; we
163    // should fix that.
164    filter: GuardFilter,
165
166    /// Configuration values derived from the consensus parameters.
167    ///
168    /// This is updated whenever the consensus parameters change.
169    params: GuardParams,
170
171    /// A mpsc channel, used to tell the task running in
172    /// [`daemon::report_status_events`] about a new event to monitor.
173    ///
174    /// This uses an `UnboundedSender` so that we don't have to await
175    /// while sending the message, which in turn allows the GuardMgr
176    /// API to be simpler.  The risk, however, is that there's no
177    /// backpressure in the event that the task running
178    /// [`daemon::report_status_events`] fails to read from this
179    /// channel.
180    ctrl: mpsc::UnboundedSender<daemon::Msg>,
181
182    /// Information about guards that we've given out, but where we have
183    /// not yet heard whether the guard was successful.
184    ///
185    /// Upon leaning whether the guard was successful, the pending
186    /// requests in this map may be either moved to `waiting`, or
187    /// discarded.
188    ///
189    /// There can be multiple pending requests corresponding to the
190    /// same guard.
191    pending: HashMap<RequestId, PendingRequest>,
192
193    /// A list of pending requests for which we have heard that the
194    /// guard was successful, but we have not yet decided whether the
195    /// circuit may be used.
196    ///
197    /// There can be multiple waiting requests corresponding to the
198    /// same guard.
199    waiting: Vec<PendingRequest>,
200
201    /// A list of fallback directories used to access the directory system
202    /// when no other directory information is yet known.
203    fallbacks: fallback::FallbackState,
204
205    /// Location in which to store persistent state.
206    storage: DynStorageHandle<GuardSets>,
207
208    /// A sender object to publish changes in our estimated clock skew.
209    send_skew: postage::watch::Sender<Option<SkewEstimate>>,
210
211    /// A receiver object to hand out to observers who want to know about
212    /// changes in our estimated clock skew.
213    recv_skew: events::ClockSkewEvents,
214
215    /// A netdir provider that we can use for adding new guards when
216    /// insufficient guards are available.
217    ///
218    /// This has to be an Option so it can be initialized from None: at the
219    /// time a GuardMgr is created, there is no NetDirProvider for it to use.
220    netdir_provider: Option<Weak<dyn NetDirProvider>>,
221
222    /// A netdir provider that we can use for discovering bridge descriptors.
223    ///
224    /// This has to be an Option so it can be initialized from None: at the time
225    /// a GuardMgr is created, there is no BridgeDescProvider for it to use.
226    #[cfg(feature = "bridge-client")]
227    bridge_desc_provider: Option<Weak<dyn bridge::BridgeDescProvider>>,
228
229    /// A list of the bridges that we are configured to use, or "None" if we are
230    /// not configured to use bridges.
231    #[cfg(feature = "bridge-client")]
232    configured_bridges: Option<Arc<[bridge::BridgeConfig]>>,
233}
234
235/// A selector that tells us which [`GuardSet`] of several is currently in use.
236#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, strum::EnumIter)]
237enum GuardSetSelector {
238    /// The default guard set is currently in use: that's the one that we use
239    /// when we have no filter installed, or the filter permits most of the
240    /// guards on the network.
241    #[default]
242    Default,
243    /// A "restrictive" guard set is currently in use: that's the one that we
244    /// use when we have a filter that excludes a large fraction of the guards
245    /// on the network.
246    Restricted,
247    /// The "bridges" guard set is currently in use: we are selecting our guards
248    /// from among the universe of configured bridges.
249    #[cfg(feature = "bridge-client")]
250    Bridges,
251}
252
253/// Describes the [`Universe`] that a guard sample should take its guards from.
254#[derive(Clone, Copy, Debug, Eq, PartialEq)]
255enum UniverseType {
256    /// Take information from the network directory.
257    NetDir,
258    /// Take information from the configured bridges.
259    #[cfg(feature = "bridge-client")]
260    BridgeSet,
261}
262
263impl GuardSetSelector {
264    /// Return a description of which [`Universe`] this guard sample should take
265    /// its guards from.
266    fn universe_type(&self) -> UniverseType {
267        match self {
268            GuardSetSelector::Default | GuardSetSelector::Restricted => UniverseType::NetDir,
269            #[cfg(feature = "bridge-client")]
270            GuardSetSelector::Bridges => UniverseType::BridgeSet,
271        }
272    }
273}
274
275/// Persistent state for a guard manager, as serialized to disk.
276#[derive(Debug, Clone, Default, Serialize, Deserialize)]
277struct GuardSets {
278    /// Which set of guards is currently in use?
279    #[serde(skip)]
280    active_set: GuardSetSelector,
281
282    /// The default set of guards to use.
283    ///
284    /// We use this one when there is no filter, or the filter permits most of the
285    /// guards on the network.
286    default: GuardSet,
287
288    /// A guard set to use when we have a restrictive filter.
289    #[serde(default)]
290    restricted: GuardSet,
291
292    /// A guard set sampled from our configured bridges.
293    #[serde(default)]
294    #[cfg(feature = "bridge-client")]
295    bridges: GuardSet,
296
297    /// Unrecognized fields, including (possibly) other guard sets.
298    #[serde(flatten)]
299    remaining: HashMap<String, tor_persist::JsonValue>,
300}
301
302/// The key (filename) we use for storing our persistent guard state in the
303/// `StateMgr`.
304///
305/// We used to store this in a different format in a filename called
306/// "default_guards" (before Arti 0.1.0).
307const STORAGE_KEY: &str = "guards";
308
309/// A description of which circuits to retire because of a configuration change.
310///
311/// TODO(nickm): Eventually we will want to add a "Some" here, to support
312/// removing only those circuits that correspond to no-longer-usable guards.
313#[derive(Clone, Debug, Eq, PartialEq)]
314#[must_use]
315#[non_exhaustive]
316pub enum RetireCircuits {
317    /// There's no need to retire any circuits.
318    None,
319    /// All circuits should be retired.
320    All,
321}
322
323impl<R: Runtime> GuardMgr<R> {
324    /// Create a new "empty" guard manager and launch its background tasks.
325    ///
326    /// It won't be able to hand out any guards until a [`NetDirProvider`] has
327    /// been installed.
328    #[instrument(skip_all, level = "trace")]
329    pub fn new<S>(
330        runtime: R,
331        state_mgr: S,
332        config: &impl GuardMgrConfig,
333    ) -> Result<Self, GuardMgrError>
334    where
335        S: StateMgr + Send + Sync + 'static,
336    {
337        let (ctrl, rcv) = mpsc::unbounded();
338        let storage: DynStorageHandle<GuardSets> = state_mgr.create_handle(STORAGE_KEY);
339        // TODO(nickm): We should do something about the old state in
340        // `default_guards`.  Probably it would be best to delete it.  We could
341        // try to migrate it instead, but that's beyond the stability guarantee
342        // that we're getting at this stage of our (pre-0.1) development.
343        let state = storage.load()?.unwrap_or_default();
344
345        let (send_skew, recv_skew) = postage::watch::channel();
346        let recv_skew = ClockSkewEvents { inner: recv_skew };
347
348        let inner = Arc::new(Mutex::new(GuardMgrInner {
349            guards: state,
350            filter: GuardFilter::unfiltered(),
351            last_primary_retry_time: runtime.now(),
352            params: GuardParams::default(),
353            ctrl,
354            pending: HashMap::new(),
355            waiting: Vec::new(),
356            fallbacks: config.fallbacks().into(),
357            storage,
358            send_skew,
359            recv_skew,
360            netdir_provider: None,
361            #[cfg(feature = "bridge-client")]
362            bridge_desc_provider: None,
363            #[cfg(feature = "bridge-client")]
364            configured_bridges: None,
365        }));
366        #[cfg(feature = "bridge-client")]
367        {
368            let mut inner = inner.lock().expect("lock poisoned");
369            // TODO(nickm): This calls `GuardMgrInner::update`. Will we mind doing so before any
370            // providers are configured? I think not, but we should make sure.
371            let _: RetireCircuits =
372                inner.replace_bridge_config(config, runtime.wallclock(), runtime.now())?;
373        }
374        {
375            let weak_inner = Arc::downgrade(&inner);
376            let rt_clone = runtime.clone();
377            runtime
378                .spawn(daemon::report_status_events(rt_clone, weak_inner, rcv))
379                .map_err(|e| GuardMgrError::from_spawn("guard status event reporter", e))?;
380        }
381        {
382            let rt_clone = runtime.clone();
383            let weak_inner = Arc::downgrade(&inner);
384            runtime
385                .spawn(daemon::run_periodic(rt_clone, weak_inner))
386                .map_err(|e| GuardMgrError::from_spawn("periodic guard updater", e))?;
387        }
388        Ok(GuardMgr { runtime, inner })
389    }
390
391    /// Install a [`NetDirProvider`] for use by this guard manager.
392    ///
393    /// It will be used to keep the guards up-to-date with changes from the
394    /// network directory, and to find new guards when no NetDir is provided to
395    /// select_guard().
396    ///
397    /// TODO: we should eventually return some kind of a task handle from this
398    /// task, even though it is not strictly speaking periodic.
399    ///
400    /// The guardmgr retains only a `Weak` reference to `provider`,
401    /// `install_netdir_provider` downgrades it on entry,
402    // TODO add ref to document when https://gitlab.torproject.org/tpo/core/arti/-/issues/624
403    // is fixed.  Also, maybe take an owned `Weak` to start with.
404    //
405    /// # Panics
406    ///
407    /// Panics if a [`NetDirProvider`] is already installed.
408    pub fn install_netdir_provider(
409        &self,
410        provider: &Arc<dyn NetDirProvider>,
411    ) -> Result<(), GuardMgrError> {
412        let weak_provider = Arc::downgrade(provider);
413        {
414            let mut inner = self.inner.lock().expect("Poisoned lock");
415            assert!(inner.netdir_provider.is_none());
416            inner.netdir_provider = Some(weak_provider.clone());
417        }
418        let weak_inner = Arc::downgrade(&self.inner);
419        let rt_clone = self.runtime.clone();
420        self.runtime
421            .spawn(daemon::keep_netdir_updated(
422                rt_clone,
423                weak_inner,
424                weak_provider,
425            ))
426            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
427        Ok(())
428    }
429
430    /// Configure a new [`bridge::BridgeDescProvider`] for this [`GuardMgr`].
431    ///
432    /// It will be used to learn about changes in the set of available bridge
433    /// descriptors; we'll inform it whenever our desired set of bridge
434    /// descriptors changes.
435    ///
436    /// TODO: Same todo as in `install_netdir_provider` about task handles.
437    ///
438    /// # Panics
439    ///
440    /// Panics if a [`bridge::BridgeDescProvider`] is already installed.
441    #[cfg(feature = "bridge-client")]
442    pub fn install_bridge_desc_provider(
443        &self,
444        provider: &Arc<dyn bridge::BridgeDescProvider>,
445    ) -> Result<(), GuardMgrError> {
446        let weak_provider = Arc::downgrade(provider);
447        {
448            let mut inner = self.inner.lock().expect("Poisoned lock");
449            assert!(inner.bridge_desc_provider.is_none());
450            inner.bridge_desc_provider = Some(weak_provider.clone());
451        }
452
453        let weak_inner = Arc::downgrade(&self.inner);
454        let rt_clone = self.runtime.clone();
455        self.runtime
456            .spawn(daemon::keep_bridge_descs_updated(
457                rt_clone,
458                weak_inner,
459                weak_provider,
460            ))
461            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
462
463        Ok(())
464    }
465
466    /// Flush our current guard state to the state manager, if there
467    /// is any unsaved state.
468    pub fn store_persistent_state(&self) -> Result<(), GuardMgrError> {
469        let inner = self.inner.lock().expect("Poisoned lock");
470        trace!("Flushing guard state to disk.");
471        inner.storage.store(&inner.guards)?;
472        Ok(())
473    }
474
475    /// Reload state from the state manager.
476    ///
477    /// We only call this method if we _don't_ have the lock on the state
478    /// files.  If we have the lock, we only want to save.
479    #[instrument(level = "trace", skip_all)]
480    pub fn reload_persistent_state(&self) -> Result<(), GuardMgrError> {
481        let mut inner = self.inner.lock().expect("Poisoned lock");
482        if let Some(new_guards) = inner.storage.load()? {
483            inner.replace_guards_with(new_guards, self.runtime.wallclock(), self.runtime.now());
484        }
485        Ok(())
486    }
487
488    /// Switch from having an unowned persistent state to having an owned one.
489    ///
490    /// Requires that we hold the lock on the state files.
491    #[instrument(level = "trace", skip_all)]
492    pub fn upgrade_to_owned_persistent_state(&self) -> Result<(), GuardMgrError> {
493        let mut inner = self.inner.lock().expect("Poisoned lock");
494        debug_assert!(inner.storage.can_store());
495        let new_guards = inner.storage.load()?.unwrap_or_default();
496        let wallclock = self.runtime.wallclock();
497        let now = self.runtime.now();
498        inner.replace_guards_with(new_guards, wallclock, now);
499        Ok(())
500    }
501
502    /// Return true if `netdir` has enough information to safely become our new netdir.
503    pub fn netdir_is_sufficient(&self, netdir: &NetDir) -> bool {
504        let mut inner = self.inner.lock().expect("Poisoned lock");
505        if inner.guards.active_set.universe_type() != UniverseType::NetDir {
506            // If we aren't using the netdir, this isn't something we want to look at.
507            return true;
508        }
509        inner
510            .guards
511            .active_guards_mut()
512            .n_primary_without_id_info_in(netdir)
513            == 0
514    }
515
516    /// Mark every guard as potentially retriable, regardless of how recently we
517    /// failed to connect to it.
518    pub fn mark_all_guards_retriable(&self) {
519        let mut inner = self.inner.lock().expect("Poisoned lock");
520        inner.guards.active_guards_mut().mark_all_guards_retriable();
521    }
522
523    /// Configure this guardmgr to use a fixed [`NetDir`] instead of a provider.
524    ///
525    /// This function is for testing only, and is exclusive with
526    /// `install_netdir_provider`.
527    ///
528    /// # Panics
529    ///
530    /// Panics if any [`NetDirProvider`] has already been installed.
531    #[cfg(any(test, feature = "testing"))]
532    pub fn install_test_netdir(&self, netdir: &NetDir) {
533        use tor_netdir::testprovider::TestNetDirProvider;
534        let wallclock = self.runtime.wallclock();
535        let now = self.runtime.now();
536        let netdir_provider: Arc<dyn NetDirProvider> =
537            Arc::new(TestNetDirProvider::from(netdir.clone()));
538        self.install_netdir_provider(&netdir_provider)
539            .expect("Couldn't install testing network provider");
540
541        let mut inner = self.inner.lock().expect("Poisoned lock");
542        inner.update(wallclock, now);
543    }
544
545    /// Replace the configuration in this `GuardMgr` with `config`.
546    #[instrument(level = "trace", skip_all)]
547    pub fn reconfigure(
548        &self,
549        config: &impl GuardMgrConfig,
550    ) -> Result<RetireCircuits, ReconfigureError> {
551        let mut inner = self.inner.lock().expect("Poisoned lock");
552        // Change the set of configured fallbacks.
553        {
554            let mut fallbacks: fallback::FallbackState = config.fallbacks().into();
555            std::mem::swap(&mut inner.fallbacks, &mut fallbacks);
556            inner.fallbacks.take_status_from(fallbacks);
557        }
558        // If we are built to use bridges, change the bridge configuration.
559        #[cfg(feature = "bridge-client")]
560        {
561            let wallclock = self.runtime.wallclock();
562            let now = self.runtime.now();
563            Ok(inner.replace_bridge_config(config, wallclock, now)?)
564        }
565        // If we are built to use bridges, change the bridge configuration.
566        #[cfg(not(feature = "bridge-client"))]
567        {
568            Ok(RetireCircuits::None)
569        }
570    }
571
572    /// Replace the current [`GuardFilter`] used by this `GuardMgr`.
573    // TODO should this be part of the config?
574    pub fn set_filter(&self, filter: GuardFilter) {
575        let wallclock = self.runtime.wallclock();
576        let now = self.runtime.now();
577        let mut inner = self.inner.lock().expect("Poisoned lock");
578        inner.set_filter(filter, wallclock, now);
579    }
580
581    /// Select a guard for a given [`GuardUsage`].
582    ///
583    /// On success, we return a [`FirstHop`] object to identify which
584    /// guard we have picked, a [`GuardMonitor`] object that the
585    /// caller can use to report whether its attempt to use the guard
586    /// succeeded or failed, and a [`GuardUsable`] future that the
587    /// caller can use to decide whether a circuit built through the
588    /// guard is actually safe to use.
589    ///
590    /// That last point is important: It's okay to build a circuit
591    /// through the guard returned by this function, but you can't
592    /// actually use it for traffic unless the [`GuardUsable`] future
593    /// yields "true".
594    #[instrument(skip_all, level = "trace")]
595    pub fn select_guard(
596        &self,
597        usage: GuardUsage,
598    ) -> Result<(FirstHop, GuardMonitor, GuardUsable), PickGuardError> {
599        let now = self.runtime.now();
600        let wallclock = self.runtime.wallclock();
601
602        let mut inner = self.inner.lock().expect("Poisoned lock");
603
604        // (I am not 100% sure that we need to consider_all_retries here, but
605        // it should _probably_ not hurt.)
606        inner.guards.active_guards_mut().consider_all_retries(now);
607
608        let (origin, guard) = inner.select_guard_with_expand(&usage, now, wallclock)?;
609        trace!(?guard, ?usage, "Guard selected");
610
611        let (usable, usable_sender) = if origin.usable_immediately() {
612            (GuardUsable::new_usable_immediately(), None)
613        } else {
614            let (u, snd) = GuardUsable::new_uncertain();
615            (u, Some(snd))
616        };
617        let request_id = pending::RequestId::next();
618        let ctrl = inner.ctrl.clone();
619        let monitor = GuardMonitor::new(request_id, ctrl);
620
621        // Note that the network can be down even if all the primary guards
622        // are not yet marked as unreachable.  But according to guard-spec we
623        // don't want to acknowledge the net as down before that point, since
624        // we don't mark all the primary guards as retriable unless
625        // we've been forced to non-primary guards.
626        let net_has_been_down =
627            if let Some(duration) = tor_proto::time_since_last_incoming_traffic() {
628                inner
629                    .guards
630                    .active_guards_mut()
631                    .all_primary_guards_are_unreachable()
632                    && duration >= inner.params.internet_down_timeout
633            } else {
634                // TODO: Is this the correct behavior in this case?
635                false
636            };
637
638        let pending_request = pending::PendingRequest::new(
639            guard.first_hop_id(),
640            usage,
641            usable_sender,
642            net_has_been_down,
643        );
644        inner.pending.insert(request_id, pending_request);
645
646        match &guard.sample {
647            Some(sample) => {
648                let guard_id = GuardId::from_relay_ids(&guard);
649                inner
650                    .guards
651                    .guards_mut(sample)
652                    .record_attempt(&guard_id, now);
653            }
654            None => {
655                // We don't record attempts for fallbacks; we only care when
656                // they have failed.
657            }
658        }
659
660        Ok((guard, monitor, usable))
661    }
662
663    /// Record that _after_ we built a circuit with a guard, something described
664    /// in `external_failure` went wrong with it.
665    pub fn note_external_failure<T>(&self, identity: &T, external_failure: ExternalActivity)
666    where
667        T: tor_linkspec::HasRelayIds + ?Sized,
668    {
669        let now = self.runtime.now();
670        let mut inner = self.inner.lock().expect("Poisoned lock");
671        let ids = inner.lookup_ids(identity);
672        for id in ids {
673            match &id.0 {
674                FirstHopIdInner::Guard(sample, id) => {
675                    inner
676                        .guards
677                        .guards_mut(sample)
678                        .record_failure(id, Some(external_failure), now);
679                }
680                FirstHopIdInner::Fallback(id) => {
681                    if external_failure == ExternalActivity::DirCache {
682                        inner.fallbacks.note_failure(id, now);
683                    }
684                }
685            }
686        }
687    }
688
689    /// Record that _after_ we built a circuit with a guard, some activity
690    /// described in `external_activity` was successful with it.
691    pub fn note_external_success<T>(&self, identity: &T, external_activity: ExternalActivity)
692    where
693        T: tor_linkspec::HasRelayIds + ?Sized,
694    {
695        let mut inner = self.inner.lock().expect("Poisoned lock");
696
697        inner.record_external_success(identity, external_activity, self.runtime.wallclock());
698    }
699
700    /// Return a stream of events about our estimated clock skew; these events
701    /// are `None` when we don't have enough information to make an estimate,
702    /// and `Some(`[`SkewEstimate`]`)` otherwise.
703    ///
704    /// Note that this stream can be lossy: if the estimate changes more than
705    /// one before you read from the stream, you might only get the most recent
706    /// update.
707    pub fn skew_events(&self) -> ClockSkewEvents {
708        let inner = self.inner.lock().expect("Poisoned lock");
709        inner.recv_skew.clone()
710    }
711
712    /// Ensure that the message queue is flushed before proceeding to
713    /// the next step.  Used for testing.
714    #[cfg(test)]
715    async fn flush_msg_queue(&self) {
716        let (snd, rcv) = oneshot::channel();
717        let pingmsg = daemon::Msg::Ping(snd);
718        {
719            let inner = self.inner.lock().expect("Poisoned lock");
720            inner
721                .ctrl
722                .unbounded_send(pingmsg)
723                .expect("Guard observer task exited prematurely.");
724        }
725        let _ = rcv.await;
726    }
727}
728
729/// An activity that can succeed or fail, and whose success or failure can be
730/// attributed to a guard.
731#[derive(Copy, Clone, Debug, Eq, PartialEq)]
732#[non_exhaustive]
733pub enum ExternalActivity {
734    /// The activity of using the guard as a directory cache.
735    DirCache,
736}
737
738impl GuardSets {
739    /// Return a reference to the currently active set of guards.
740    ///
741    /// (That's easy enough for now, since there is never more than one set of
742    /// guards.  But eventually that will change, as we add support for more
743    /// complex filter types, and for bridge relays. Those will use separate
744    /// `GuardSet` instances, and this accessor will choose the right one.)
745    fn active_guards(&self) -> &GuardSet {
746        self.guards(&self.active_set)
747    }
748
749    /// Return the set of guards corresponding to the provided selector.
750    fn guards(&self, selector: &GuardSetSelector) -> &GuardSet {
751        match selector {
752            GuardSetSelector::Default => &self.default,
753            GuardSetSelector::Restricted => &self.restricted,
754            #[cfg(feature = "bridge-client")]
755            GuardSetSelector::Bridges => &self.bridges,
756        }
757    }
758
759    /// Return a mutable reference to the currently active set of guards.
760    fn active_guards_mut(&mut self) -> &mut GuardSet {
761        self.guards_mut(&self.active_set.clone())
762    }
763
764    /// Return a mutable reference to the set of guards corresponding to the
765    /// provided selector.
766    fn guards_mut(&mut self, selector: &GuardSetSelector) -> &mut GuardSet {
767        match selector {
768            GuardSetSelector::Default => &mut self.default,
769            GuardSetSelector::Restricted => &mut self.restricted,
770            #[cfg(feature = "bridge-client")]
771            GuardSetSelector::Bridges => &mut self.bridges,
772        }
773    }
774
775    /// Update all non-persistent state for the guards in this object with the
776    /// state in `other`.
777    fn copy_status_from(&mut self, mut other: GuardSets) {
778        use strum::IntoEnumIterator;
779        for sample in GuardSetSelector::iter() {
780            self.guards_mut(&sample)
781                .copy_ephemeral_status_into_newly_loaded_state(std::mem::take(
782                    other.guards_mut(&sample),
783                ));
784        }
785        self.active_set = other.active_set;
786    }
787}
788
789impl GuardMgrInner {
790    /// Look up the latest [`NetDir`] (if there is one) from our
791    /// [`NetDirProvider`] (if we have one).
792    fn timely_netdir(&self) -> Option<Arc<NetDir>> {
793        self.netdir_provider
794            .as_ref()
795            .and_then(Weak::upgrade)
796            .and_then(|np| np.timely_netdir().ok())
797    }
798
799    /// Look up the latest [`BridgeDescList`](bridge::BridgeDescList) (if there
800    /// is one) from our [`BridgeDescProvider`](bridge::BridgeDescProvider) (if
801    /// we have one).
802    #[cfg(feature = "bridge-client")]
803    fn latest_bridge_desc_list(&self) -> Option<Arc<bridge::BridgeDescList>> {
804        self.bridge_desc_provider
805            .as_ref()
806            .and_then(Weak::upgrade)
807            .map(|bp| bp.bridges())
808    }
809
810    /// Run a function that takes `&mut self` and an optional NetDir.
811    ///
812    /// We try to use the netdir from our [`NetDirProvider`] (if we have one).
813    /// Therefore, although its _parameters_ are suitable for every
814    /// [`GuardSet`], its _contents_ might not be. For those, call
815    /// [`with_opt_universe`](Self::with_opt_universe) instead.
816    //
817    // This function exists to handle the lifetime mess where sometimes the
818    // resulting NetDir will borrow from `netdir`, and sometimes it will borrow
819    // from an Arc returned by `self.latest_netdir()`.
820    fn with_opt_netdir<F, T>(&mut self, func: F) -> T
821    where
822        F: FnOnce(&mut Self, Option<&NetDir>) -> T,
823    {
824        if let Some(nd) = self.timely_netdir() {
825            func(self, Some(nd.as_ref()))
826        } else {
827            func(self, None)
828        }
829    }
830
831    /// Return the latest `BridgeSet` based on our `BridgeDescProvider` and our
832    /// configured bridges.
833    ///
834    /// Returns `None` if we are not configured to use bridges.
835    #[cfg(feature = "bridge-client")]
836    fn latest_bridge_set(&self) -> Option<bridge::BridgeSet> {
837        let bridge_config = self.configured_bridges.as_ref()?.clone();
838        let bridge_descs = self.latest_bridge_desc_list();
839        Some(bridge::BridgeSet::new(bridge_config, bridge_descs))
840    }
841
842    /// Run a function that takes `&mut self` and an optional [`UniverseRef`].
843    ///
844    /// We try to get a universe from the appropriate source for the current
845    /// active guard set.
846    fn with_opt_universe<F, T>(&mut self, func: F) -> T
847    where
848        F: FnOnce(&mut Self, Option<&UniverseRef>) -> T,
849    {
850        // TODO: it might be nice to make `func` take an GuardSet and a set of
851        // parameters, so we can't get the active set wrong. Doing that will
852        // require a fair amount of refactoring so that the borrow checker is
853        // happy, however.
854        match self.guards.active_set.universe_type() {
855            UniverseType::NetDir => {
856                if let Some(nd) = self.timely_netdir() {
857                    func(self, Some(&UniverseRef::NetDir(nd)))
858                } else {
859                    func(self, None)
860                }
861            }
862            #[cfg(feature = "bridge-client")]
863            UniverseType::BridgeSet => func(
864                self,
865                self.latest_bridge_set()
866                    .map(UniverseRef::BridgeSet)
867                    .as_ref(),
868            ),
869        }
870    }
871
872    /// Update the status of all guards in the active set, based on the passage
873    /// of time, our configuration, and the relevant Universe for our active
874    /// set.
875    #[instrument(skip_all, level = "trace")]
876    fn update(&mut self, wallclock: SystemTime, now: Instant) {
877        self.with_opt_netdir(|this, netdir| {
878            // Here we update our parameters from the latest NetDir, and check
879            // whether we need to change to a (non)-restrictive GuardSet based
880            // on those parameters and our configured filter.
881            //
882            // This uses a NetDir unconditionally, since we always want to take
883            // the network parameters our parameters from the consensus even if
884            // the guards themselves are from a BridgeSet.
885            this.update_active_set_params_and_filter(netdir);
886        });
887        self.with_opt_universe(|this, univ| {
888            // Now we update the set of guards themselves based on the
889            // Universe, which is either the latest NetDir, or the latest
890            // BridgeSet—depending on what the GuardSet wants.
891            Self::update_guardset_internal(
892                &this.params,
893                wallclock,
894                this.guards.active_set.universe_type(),
895                this.guards.active_guards_mut(),
896                univ,
897            );
898            #[cfg(feature = "bridge-client")]
899            this.update_desired_descriptors(now);
900            #[cfg(not(feature = "bridge-client"))]
901            let _ = now;
902        });
903    }
904
905    /// Replace our bridge configuration with the one from `new_config`.
906    #[cfg(feature = "bridge-client")]
907    #[instrument(level = "trace", skip_all)]
908    fn replace_bridge_config(
909        &mut self,
910        new_config: &impl GuardMgrConfig,
911        wallclock: SystemTime,
912        now: Instant,
913    ) -> Result<RetireCircuits, GuardMgrConfigError> {
914        match (&self.configured_bridges, new_config.bridges_enabled()) {
915            (None, false) => {
916                assert_ne!(
917                    self.guards.active_set.universe_type(),
918                    UniverseType::BridgeSet
919                );
920                return Ok(RetireCircuits::None); // nothing to do
921            }
922            (_, true) if !self.storage.can_store() => {
923                // TODO: Ideally we would try to upgrade, obtaining an exclusive lock,
924                // but `StorageHandle` currently lacks a method for that.
925                return Err(GuardMgrConfigError::NoLock("bridges configured".into()));
926            }
927            (Some(current_bridges), true) if new_config.bridges() == current_bridges.as_ref() => {
928                assert_eq!(
929                    self.guards.active_set.universe_type(),
930                    UniverseType::BridgeSet
931                );
932                return Ok(RetireCircuits::None); // nothing to do.
933            }
934            (_, true) => {
935                self.configured_bridges = Some(new_config.bridges().into());
936                self.guards.active_set = GuardSetSelector::Bridges;
937            }
938            (_, false) => {
939                self.configured_bridges = None;
940                self.guards.active_set = GuardSetSelector::Default;
941            }
942        }
943
944        // If we have gotten here, we have changed the set of bridges, changed
945        // which set is active, or changed them both.  We need to make sure that
946        // our `GuardSet` object is up-to-date with our configuration.
947        self.update(wallclock, now);
948
949        // We also need to tell the caller that its circuits are no good any
950        // more.
951        //
952        // TODO(nickm): Someday we can do this more judiciously by retuning
953        // "Some" in the case where we're still using bridges but our new bridge
954        // set contains different elements; see comment on RetireCircuits.
955        //
956        // TODO(nickm): We could also safely return RetireCircuits::None if we
957        // are using bridges, and our new bridge list is a superset of the older
958        // one.
959        Ok(RetireCircuits::All)
960    }
961
962    /// Update our parameters, our selection (based on network parameters and
963    /// configuration), and make sure the active GuardSet has the right
964    /// configuration itself.
965    ///
966    /// We should call this whenever the NetDir's parameters change, or whenever
967    /// our filter changes.  We do not need to call it for new elements arriving
968    /// in our Universe, since those do not affect anything here.
969    ///
970    /// We should also call this whenever a new GuardSet becomes active for any
971    /// reason _other_ than just having called this function.
972    ///
973    /// (This function is only invoked from `update`, which should be called
974    /// under the above circumstances.)
975    fn update_active_set_params_and_filter(&mut self, netdir: Option<&NetDir>) {
976        // Set the parameters.  These always come from the NetDir, even if this
977        // is a bridge set.
978        if let Some(netdir) = netdir {
979            match GuardParams::try_from(netdir.params()) {
980                Ok(params) => self.params = params,
981                Err(e) => warn!("Unusable guard parameters from consensus: {}", e),
982            }
983
984            self.select_guard_set_based_on_filter(netdir);
985        }
986
987        // Change the filter, if it doesn't match what the guards have.
988        //
989        // TODO(nickm): We could use a "dirty" flag or something to decide
990        // whether we need to call set_filter, if this comparison starts to show
991        // up in profiles.
992        if self.guards.active_guards().filter() != &self.filter {
993            let restrictive = self.guards.active_set == GuardSetSelector::Restricted;
994            self.guards
995                .active_guards_mut()
996                .set_filter(self.filter.clone(), restrictive);
997        }
998    }
999
1000    /// Update the status of every guard in `active_guards`, and expand it as
1001    /// needed.
1002    ///
1003    /// This function doesn't take `&self`, to make sure that we are only
1004    /// affecting a single `GuardSet`, and to avoid confusing the borrow
1005    /// checker.
1006    ///
1007    /// We should call this whenever the contents of the universe have changed.
1008    ///
1009    /// We should also call this whenever a new GuardSet becomes active.
1010    fn update_guardset_internal<U: Universe>(
1011        params: &GuardParams,
1012        now: SystemTime,
1013        universe_type: UniverseType,
1014        active_guards: &mut GuardSet,
1015        universe: Option<&U>,
1016    ) -> ExtendedStatus {
1017        // Expire guards.  Do that early, in case doing so makes it clear that
1018        // we need to grab more guards or mark others as primary.
1019        active_guards.expire_old_guards(params, now);
1020
1021        let extended = if let Some(universe) = universe {
1022            // TODO: This check here may be completely unnecessary. I inserted
1023            // it back in 5ac0fcb7ef603e0d14 because I was originally concerned
1024            // it might be undesirable to list a primary guard as "missing dir
1025            // info" (and therefore unusable) if we were expecting to get its
1026            // microdescriptor "very soon."
1027            //
1028            // But due to the other check in `netdir_is_sufficient`, we
1029            // shouldn't be installing a netdir until it has microdescs for all
1030            // of the (non-bridge) primary guards that it lists. - nickm
1031            let n = active_guards.n_primary_without_id_info_in(universe);
1032            if n > 0 && universe_type == UniverseType::NetDir {
1033                // We are missing the information from a NetDir needed to see
1034                // whether our primary guards are listed, so we shouldn't update
1035                // our guard status.
1036                //
1037                // We don't want to do this check if we are using bridges, since
1038                // a missing bridge descriptor is not guaranteed to temporary
1039                // problem in the same way that a missing microdescriptor is.
1040                // (When a bridge desc is missing, the bridge could be down or
1041                // unreachable, and nobody else can help us. But if a microdesc
1042                // is missing, we just need to find a cache that has it.)
1043                trace!(
1044                    n_primary_without_id_info = n,
1045                    "Not extending guardset, missing information."
1046                );
1047                return ExtendedStatus::No;
1048            }
1049            active_guards.update_status_from_dir(universe);
1050            active_guards.extend_sample_as_needed(now, params, universe)
1051        } else {
1052            trace!("Not extending guardset, no universe given.");
1053            ExtendedStatus::No
1054        };
1055
1056        active_guards.select_primary_guards(params);
1057
1058        extended
1059    }
1060
1061    /// If using bridges, tell the BridgeDescProvider which descriptors we want.
1062    /// We need to check this *after* we select our primary guards.
1063    #[cfg(feature = "bridge-client")]
1064    fn update_desired_descriptors(&mut self, now: Instant) {
1065        if self.guards.active_set.universe_type() != UniverseType::BridgeSet {
1066            return;
1067        }
1068
1069        let provider = self.bridge_desc_provider.as_ref().and_then(Weak::upgrade);
1070        let bridge_set = self.latest_bridge_set();
1071        if let (Some(provider), Some(bridge_set)) = (provider, bridge_set) {
1072            let desired: Vec<_> = self
1073                .guards
1074                .active_guards()
1075                .descriptors_to_request(now, &self.params)
1076                .into_iter()
1077                .flat_map(|guard| bridge_set.bridge_by_guard(guard))
1078                .cloned()
1079                .collect();
1080
1081            provider.set_bridges(&desired);
1082        }
1083    }
1084
1085    /// Replace the active guard state with `new_state`, preserving
1086    /// non-persistent state for any guards that are retained.
1087    #[instrument(level = "trace", skip_all)]
1088    fn replace_guards_with(
1089        &mut self,
1090        mut new_guards: GuardSets,
1091        wallclock: SystemTime,
1092        now: Instant,
1093    ) {
1094        std::mem::swap(&mut self.guards, &mut new_guards);
1095        self.guards.copy_status_from(new_guards);
1096        self.update(wallclock, now);
1097    }
1098
1099    /// Update which guard set is active based on the current filter and the
1100    /// provided netdir.
1101    ///
1102    /// After calling this function, the new guard set's filter may be
1103    /// out-of-date: be sure to call `set_filter` as appropriate.
1104    fn select_guard_set_based_on_filter(&mut self, netdir: &NetDir) {
1105        // In general, we'd like to use the restricted set if we're under the
1106        // threshold, and the default set if we're over the threshold.  But if
1107        // we're sitting close to the threshold, we want to avoid flapping back
1108        // and forth, so we only change when we're more than 5% "off" from
1109        // whatever our current setting is.
1110        //
1111        // (See guard-spec section 2 for more information.)
1112        let offset = match self.guards.active_set {
1113            GuardSetSelector::Default => -0.05,
1114            GuardSetSelector::Restricted => 0.05,
1115            // If we're using bridges, then we don't switch between the other guard sets based on on the filter at all.
1116            #[cfg(feature = "bridge-client")]
1117            GuardSetSelector::Bridges => return,
1118        };
1119        let frac_permitted = self.filter.frac_bw_permitted(netdir);
1120        let threshold = self.params.filter_threshold + offset;
1121        let new_choice = if frac_permitted < threshold {
1122            GuardSetSelector::Restricted
1123        } else {
1124            GuardSetSelector::Default
1125        };
1126
1127        if new_choice != self.guards.active_set {
1128            info!(
1129                "Guard selection changed; we are now using the {:?} guard set",
1130                &new_choice
1131            );
1132
1133            self.guards.active_set = new_choice;
1134
1135            if frac_permitted < self.params.extreme_threshold {
1136                warn!(
1137                    "The number of guards permitted is smaller than the recommended minimum of {:.0}%.",
1138                    self.params.extreme_threshold * 100.0,
1139                );
1140            }
1141        }
1142    }
1143
1144    /// Mark all of our primary guards as retriable, if we haven't done
1145    /// so since long enough before `now`.
1146    ///
1147    /// We want to call this function whenever a guard attempt succeeds,
1148    /// if the internet seemed to be down when the guard attempt was
1149    /// first launched.
1150    fn maybe_retry_primary_guards(&mut self, now: Instant) {
1151        // We don't actually want to mark our primary guards as
1152        // retriable more than once per internet_down_timeout: after
1153        // the first time, we would just be noticing the same "coming
1154        // back online" event more than once.
1155        let interval = self.params.internet_down_timeout;
1156        if self.last_primary_retry_time + interval <= now {
1157            debug!(
1158                "Successfully reached a guard after a while off the internet; marking all primary guards retriable."
1159            );
1160            self.guards
1161                .active_guards_mut()
1162                .mark_primary_guards_retriable();
1163            self.last_primary_retry_time = now;
1164        }
1165    }
1166
1167    /// Replace the current GuardFilter with `filter`.
1168    #[instrument(level = "trace", skip_all)]
1169    fn set_filter(&mut self, filter: GuardFilter, wallclock: SystemTime, now: Instant) {
1170        self.filter = filter;
1171        self.update(wallclock, now);
1172    }
1173
1174    /// Called when the circuit manager reports (via [`GuardMonitor`]) that
1175    /// a guard succeeded or failed.
1176    ///
1177    /// Changes the guard's status as appropriate, and updates the pending
1178    /// request as needed.
1179    #[allow(clippy::cognitive_complexity)]
1180    pub(crate) fn handle_msg(
1181        &mut self,
1182        request_id: RequestId,
1183        status: GuardStatus,
1184        skew: Option<ClockSkew>,
1185        runtime: &impl tor_rtcompat::SleepProvider,
1186    ) {
1187        if let Some(mut pending) = self.pending.remove(&request_id) {
1188            // If there was a pending request matching this RequestId, great!
1189            let guard_id = pending.guard_id();
1190            trace!(?guard_id, ?status, "Received report of guard status");
1191
1192            // First, handle the skew report (if any)
1193            if let Some(skew) = skew {
1194                let now = runtime.now();
1195                let observation = skew::SkewObservation { skew, when: now };
1196
1197                match &guard_id.0 {
1198                    FirstHopIdInner::Guard(_, id) => {
1199                        self.guards.active_guards_mut().record_skew(id, observation);
1200                    }
1201                    FirstHopIdInner::Fallback(id) => {
1202                        self.fallbacks.note_skew(id, observation);
1203                    }
1204                }
1205                // TODO: We call this whenever we receive an observed clock
1206                // skew. That's not the perfect timing for two reasons.  First
1207                // off, it might be too frequent: it does an O(n) calculation,
1208                // which isn't ideal.  Second, it might be too infrequent: after
1209                // an hour has passed, a given observation won't be up-to-date
1210                // any more, and we might want to recalculate the skew
1211                // accordingly.
1212                self.update_skew(now);
1213            }
1214
1215            match (status, &guard_id.0) {
1216                (GuardStatus::Failure, FirstHopIdInner::Fallback(id)) => {
1217                    // We used a fallback, and we weren't able to build a circuit through it.
1218                    self.fallbacks.note_failure(id, runtime.now());
1219                }
1220                (_, FirstHopIdInner::Fallback(_)) => {
1221                    // We don't record any other kind of circuit activity if we
1222                    // took the entry from the fallback list.
1223                }
1224                (GuardStatus::Success, FirstHopIdInner::Guard(sample, id)) => {
1225                    // If we had gone too long without any net activity when we
1226                    // gave out this guard, and now we're seeing a circuit
1227                    // succeed, tell the primary guards that they might be
1228                    // retriable.
1229                    if pending.net_has_been_down() {
1230                        self.maybe_retry_primary_guards(runtime.now());
1231                    }
1232
1233                    // The guard succeeded.  Tell the GuardSet.
1234                    self.guards.guards_mut(sample).record_success(
1235                        id,
1236                        &self.params,
1237                        None,
1238                        runtime.wallclock(),
1239                    );
1240                    // Either tell the request whether the guard is
1241                    // usable, or schedule it as a "waiting" request.
1242                    if let Some(usable) = self.guard_usability_status(&pending, runtime.now()) {
1243                        trace!(?guard_id, usable, "Known usability status");
1244                        pending.reply(usable);
1245                    } else {
1246                        // This is the one case where we can't use the
1247                        // guard yet.
1248                        trace!(?guard_id, "Not able to answer right now");
1249                        pending.mark_waiting(runtime.now());
1250                        self.waiting.push(pending);
1251                    }
1252                }
1253                (GuardStatus::Failure, FirstHopIdInner::Guard(sample, id)) => {
1254                    self.guards
1255                        .guards_mut(sample)
1256                        .record_failure(id, None, runtime.now());
1257                    pending.reply(false);
1258                }
1259                (GuardStatus::AttemptAbandoned, FirstHopIdInner::Guard(sample, id)) => {
1260                    self.guards.guards_mut(sample).record_attempt_abandoned(id);
1261                    pending.reply(false);
1262                }
1263                (GuardStatus::Indeterminate, FirstHopIdInner::Guard(sample, id)) => {
1264                    self.guards
1265                        .guards_mut(sample)
1266                        .record_indeterminate_result(id);
1267                    pending.reply(false);
1268                }
1269            };
1270        } else {
1271            warn!(
1272                "Got a status {:?} for a request {:?} that wasn't pending",
1273                status, request_id
1274            );
1275        }
1276
1277        // We might need to update the primary guards based on changes in the
1278        // status of guards above.
1279        self.guards
1280            .active_guards_mut()
1281            .select_primary_guards(&self.params);
1282
1283        // Some waiting request may just have become ready (usable or
1284        // not); we need to give them the information they're waiting
1285        // for.
1286        self.expire_and_answer_pending_requests(runtime.now());
1287    }
1288
1289    /// Helper to implement `GuardMgr::note_external_success()`.
1290    ///
1291    /// (This has to be a separate function so that we can borrow params while
1292    /// we have `mut self` borrowed.)
1293    fn record_external_success<T>(
1294        &mut self,
1295        identity: &T,
1296        external_activity: ExternalActivity,
1297        now: SystemTime,
1298    ) where
1299        T: tor_linkspec::HasRelayIds + ?Sized,
1300    {
1301        for id in self.lookup_ids(identity) {
1302            match &id.0 {
1303                FirstHopIdInner::Guard(sample, id) => {
1304                    self.guards.guards_mut(sample).record_success(
1305                        id,
1306                        &self.params,
1307                        Some(external_activity),
1308                        now,
1309                    );
1310                }
1311                FirstHopIdInner::Fallback(id) => {
1312                    if external_activity == ExternalActivity::DirCache {
1313                        self.fallbacks.note_success(id);
1314                    }
1315                }
1316            }
1317        }
1318    }
1319
1320    /// Return an iterator over all of the clock skew observations we've made
1321    /// for guards or fallbacks.
1322    fn skew_observations(&self) -> impl Iterator<Item = &skew::SkewObservation> {
1323        self.fallbacks
1324            .skew_observations()
1325            .chain(self.guards.active_guards().skew_observations())
1326    }
1327
1328    /// Recalculate our estimated clock skew, and publish it to anybody who
1329    /// cares.
1330    fn update_skew(&mut self, now: Instant) {
1331        let estimate = skew::SkewEstimate::estimate_skew(self.skew_observations(), now);
1332        // TODO: we might want to do this only conditionally, when the skew
1333        // estimate changes.
1334        *self.send_skew.borrow_mut() = estimate;
1335    }
1336
1337    /// If the circuit built because of a given [`PendingRequest`] may
1338    /// now be used (or discarded), return `Some(true)` or
1339    /// `Some(false)` respectively.
1340    ///
1341    /// Return None if we can't yet give an answer about whether such
1342    /// a circuit is usable.
1343    fn guard_usability_status(&self, pending: &PendingRequest, now: Instant) -> Option<bool> {
1344        match &pending.guard_id().0 {
1345            FirstHopIdInner::Guard(sample, id) => self.guards.guards(sample).circ_usability_status(
1346                id,
1347                pending.usage(),
1348                &self.params,
1349                now,
1350            ),
1351            // Fallback circuits are usable immediately, since we don't have to wait to
1352            // see whether any _other_ circuit succeeds or fails.
1353            FirstHopIdInner::Fallback(_) => Some(true),
1354        }
1355    }
1356
1357    /// For requests that have been "waiting" for an answer for too long,
1358    /// expire them and tell the circuit manager that their circuits
1359    /// are unusable.
1360    fn expire_and_answer_pending_requests(&mut self, now: Instant) {
1361        // A bit ugly: we use a separate Vec here to avoid borrowing issues,
1362        // and put it back when we're done.
1363        let mut waiting = Vec::new();
1364        std::mem::swap(&mut waiting, &mut self.waiting);
1365
1366        waiting.retain_mut(|pending| {
1367            let expired = pending
1368                .waiting_since()
1369                .and_then(|w| now.checked_duration_since(w))
1370                .map(|d| d >= self.params.np_idle_timeout)
1371                == Some(true);
1372            if expired {
1373                trace!(?pending, "Pending request expired");
1374                pending.reply(false);
1375                return false;
1376            }
1377
1378            // TODO-SPEC: guard_usability_status isn't what the spec says.  It
1379            // says instead that we should look at _circuit_ status, saying:
1380            //  "   Definition: In the algorithm above, C2 "blocks" C1 if:
1381            // * C2 obeys all the restrictions that C1 had to obey, AND
1382            // * C2 has higher priority than C1, AND
1383            // * Either C2 is <complete>, or C2 is <waiting_for_better_guard>,
1384            // or C2 has been <usable_if_no_better_guard> for no more than
1385            // {NONPRIMARY_GUARD_CONNECT_TIMEOUT} seconds."
1386            //
1387            // See comments in sample::GuardSet::circ_usability_status.
1388
1389            if let Some(answer) = self.guard_usability_status(pending, now) {
1390                trace!(?pending, answer, "Pending request now ready");
1391                pending.reply(answer);
1392                return false;
1393            }
1394            true
1395        });
1396
1397        // Put the waiting list back.
1398        std::mem::swap(&mut waiting, &mut self.waiting);
1399    }
1400
1401    /// Return every currently extant FirstHopId for a guard or fallback
1402    /// directory matching (or possibly matching) the provided keys.
1403    ///
1404    /// An identity is _possibly matching_ if it contains some of the IDs in the
1405    /// provided identity, and it has no _contradictory_ identities, but it does
1406    /// not necessarily contain _all_ of those identities.
1407    ///
1408    /// # TODO
1409    ///
1410    /// This function should probably not exist; it's only used so that dirmgr
1411    /// can report successes or failures, since by the time it observes them it
1412    /// doesn't know whether its circuit came from a guard or a fallback.  To
1413    /// solve that, we'll need CircMgr to record and report which one it was
1414    /// using, which will take some more plumbing.
1415    ///
1416    /// TODO relay: we will have to make the change above when we implement
1417    /// relays; otherwise, it would be possible for an attacker to exploit it to
1418    /// mislead us about our guard status.
1419    fn lookup_ids<T>(&self, identity: &T) -> Vec<FirstHopId>
1420    where
1421        T: tor_linkspec::HasRelayIds + ?Sized,
1422    {
1423        use strum::IntoEnumIterator;
1424        let mut vec = Vec::with_capacity(2);
1425
1426        let id = ids::GuardId::from_relay_ids(identity);
1427        for sample in GuardSetSelector::iter() {
1428            let guard_id = match self.guards.guards(&sample).contains(&id) {
1429                Ok(true) => &id,
1430                Err(other) => other,
1431                Ok(false) => continue,
1432            };
1433            vec.push(FirstHopId(FirstHopIdInner::Guard(sample, guard_id.clone())));
1434        }
1435
1436        let id = ids::FallbackId::from_relay_ids(identity);
1437        if self.fallbacks.contains(&id) {
1438            vec.push(id.into());
1439        }
1440
1441        vec
1442    }
1443
1444    /// Run any periodic events that update guard status, and return a
1445    /// duration after which periodic events should next be run.
1446    #[instrument(skip_all, level = "trace")]
1447    pub(crate) fn run_periodic_events(&mut self, wallclock: SystemTime, now: Instant) -> Duration {
1448        self.update(wallclock, now);
1449        self.expire_and_answer_pending_requests(now);
1450        Duration::from_secs(1) // TODO: Too aggressive.
1451    }
1452
1453    /// Try to select a guard, expanding the sample if the first attempt fails.
1454    #[instrument(skip_all, level = "trace")]
1455    fn select_guard_with_expand(
1456        &mut self,
1457        usage: &GuardUsage,
1458        now: Instant,
1459        wallclock: SystemTime,
1460    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1461        // Try to find a guard.
1462        let first_error = match self.select_guard_once(usage, now) {
1463            Ok(res1) => return Ok(res1),
1464            Err(e) => {
1465                trace!("Couldn't select guard on first attempt: {}", e);
1466                e
1467            }
1468        };
1469
1470        // That didn't work. If we have a netdir, expand the sample and try again.
1471        let res = self.with_opt_universe(|this, univ| {
1472            let univ = univ?;
1473            trace!("No guards available, trying to extend the sample.");
1474            // Make sure that the status on all of our guards are accurate, and
1475            // expand the sample if we can.
1476            //
1477            // Our parameters and configuration did not change, so we do not
1478            // need to call update() or update_active_set_and_filter(). This
1479            // call is sufficient to  extend the sample and recompute primary
1480            // guards.
1481            let extended = Self::update_guardset_internal(
1482                &this.params,
1483                wallclock,
1484                this.guards.active_set.universe_type(),
1485                this.guards.active_guards_mut(),
1486                Some(univ),
1487            );
1488            if extended == ExtendedStatus::Yes {
1489                match this.select_guard_once(usage, now) {
1490                    Ok(res) => return Some(res),
1491                    Err(e) => {
1492                        trace!("Couldn't select guard after update: {}", e);
1493                    }
1494                }
1495            }
1496            None
1497        });
1498        if let Some(res) = res {
1499            return Ok(res);
1500        }
1501
1502        // Okay, that didn't work either.  If we were asked for a directory
1503        // guard, and we aren't using bridges, then we may be able to use a
1504        // fallback.
1505        if usage.kind == GuardUsageKind::OneHopDirectory
1506            && self.guards.active_set.universe_type() == UniverseType::NetDir
1507        {
1508            return self.select_fallback(now);
1509        }
1510
1511        // Couldn't extend the sample or use a fallback; return the original error.
1512        Err(first_error)
1513    }
1514
1515    /// Helper: try to pick a single guard, without retrying on failure.
1516    fn select_guard_once(
1517        &self,
1518        usage: &GuardUsage,
1519        now: Instant,
1520    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1521        let active_set = &self.guards.active_set;
1522        #[cfg_attr(not(feature = "bridge-client"), allow(unused_mut))]
1523        let (list_kind, mut first_hop) =
1524            self.guards
1525                .guards(active_set)
1526                .pick_guard(active_set, usage, &self.params, now)?;
1527        #[cfg(feature = "bridge-client")]
1528        if self.guards.active_set.universe_type() == UniverseType::BridgeSet {
1529            // See if we can promote first_hop to a viable CircTarget.
1530            let bridges = self.latest_bridge_set().ok_or_else(|| {
1531                PickGuardError::Internal(internal!(
1532                    "No bridge set available, even though this is the Bridges sample"
1533                ))
1534            })?;
1535            first_hop.lookup_bridge_circ_target(&bridges);
1536
1537            if usage.kind == GuardUsageKind::Data && !first_hop.contains_circ_target() {
1538                return Err(PickGuardError::Internal(internal!(
1539                    "Tried to return a non-circtarget guard with Data usage!"
1540                )));
1541            }
1542        }
1543        Ok((list_kind, first_hop))
1544    }
1545
1546    /// Helper: Select a fallback directory.
1547    ///
1548    /// Called when we have no guard information to use. Return values are as
1549    /// for [`GuardMgr::select_guard()`]
1550    fn select_fallback(
1551        &self,
1552        now: Instant,
1553    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1554        let filt = self.guards.active_guards().filter();
1555
1556        let fallback = crate::FirstHop {
1557            sample: None,
1558            inner: crate::FirstHopInner::Chan(OwnedChanTarget::from_chan_target(
1559                self.fallbacks.choose(&mut rand::rng(), now, filt)?,
1560            )),
1561        };
1562        let fallback = filt.modify_hop(fallback)?;
1563        Ok((sample::ListKind::Fallback, fallback))
1564    }
1565}
1566
1567/// A possible outcome of trying to extend a guard sample.
1568#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1569enum ExtendedStatus {
1570    /// The guard sample was extended. (At least one guard was added to it.)
1571    Yes,
1572    /// The guard sample was not extended.
1573    No,
1574}
1575
1576/// A set of parameters, derived from the consensus document, controlling
1577/// the behavior of a guard manager.
1578#[derive(Debug, Clone)]
1579#[cfg_attr(test, derive(PartialEq))]
1580struct GuardParams {
1581    /// How long should a sampled, un-confirmed guard be kept in the sample before it expires?
1582    lifetime_unconfirmed: Duration,
1583    /// How long should a confirmed guard be kept in the sample before
1584    /// it expires?
1585    lifetime_confirmed: Duration,
1586    /// How long may  a guard be unlisted before we remove it from the sample?
1587    lifetime_unlisted: Duration,
1588    /// Largest number of guards we're willing to add to the sample.
1589    max_sample_size: usize,
1590    /// Largest fraction of the network's guard bandwidth that we're
1591    /// willing to add to the sample.
1592    max_sample_bw_fraction: f64,
1593    /// Smallest number of guards that we're willing to have in the
1594    /// sample, after applying a [`GuardFilter`].
1595    min_filtered_sample_size: usize,
1596    /// How many guards are considered "Primary"?
1597    n_primary: usize,
1598    /// When making a regular circuit, how many primary guards should we
1599    /// be willing to try?
1600    data_parallelism: usize,
1601    /// When making a one-hop directory circuit, how many primary
1602    /// guards should we be willing to try?
1603    dir_parallelism: usize,
1604    /// For how long does a pending attempt to connect to a guard
1605    /// block an attempt to use a less-favored non-primary guard?
1606    np_connect_timeout: Duration,
1607    /// How long do we allow a circuit to a successful but unfavored
1608    /// non-primary guard to sit around before deciding not to use it?
1609    np_idle_timeout: Duration,
1610    /// After how much time without successful activity does a
1611    /// successful circuit indicate that we should retry our primary
1612    /// guards?
1613    internet_down_timeout: Duration,
1614    /// What fraction of the guards can be can be filtered out before we
1615    /// decide that our filter is "very restrictive"?
1616    filter_threshold: f64,
1617    /// What fraction of the guards determine that our filter is "very
1618    /// restrictive"?
1619    extreme_threshold: f64,
1620}
1621
1622impl Default for GuardParams {
1623    fn default() -> Self {
1624        let one_day = Duration::from_secs(86400);
1625        GuardParams {
1626            lifetime_unconfirmed: one_day * 120,
1627            lifetime_confirmed: one_day * 60,
1628            lifetime_unlisted: one_day * 20,
1629            max_sample_size: 60,
1630            max_sample_bw_fraction: 0.2,
1631            min_filtered_sample_size: 20,
1632            n_primary: 3,
1633            data_parallelism: 1,
1634            dir_parallelism: 3,
1635            np_connect_timeout: Duration::from_secs(15),
1636            np_idle_timeout: Duration::from_secs(600),
1637            internet_down_timeout: Duration::from_secs(600),
1638            filter_threshold: 0.2,
1639            extreme_threshold: 0.01,
1640        }
1641    }
1642}
1643
1644impl TryFrom<&NetParameters> for GuardParams {
1645    type Error = tor_units::Error;
1646    fn try_from(p: &NetParameters) -> Result<GuardParams, Self::Error> {
1647        Ok(GuardParams {
1648            lifetime_unconfirmed: p.guard_lifetime_unconfirmed.try_into()?,
1649            lifetime_confirmed: p.guard_lifetime_confirmed.try_into()?,
1650            lifetime_unlisted: p.guard_remove_unlisted_after.try_into()?,
1651            max_sample_size: p.guard_max_sample_size.try_into()?,
1652            max_sample_bw_fraction: p.guard_max_sample_threshold.as_fraction(),
1653            min_filtered_sample_size: p.guard_filtered_min_sample_size.try_into()?,
1654            n_primary: p.guard_n_primary.try_into()?,
1655            data_parallelism: p.guard_use_parallelism.try_into()?,
1656            dir_parallelism: p.guard_dir_use_parallelism.try_into()?,
1657            np_connect_timeout: p.guard_nonprimary_connect_timeout.try_into()?,
1658            np_idle_timeout: p.guard_nonprimary_idle_timeout.try_into()?,
1659            internet_down_timeout: p.guard_internet_likely_down.try_into()?,
1660            filter_threshold: p.guard_meaningful_restriction.as_fraction(),
1661            extreme_threshold: p.guard_extreme_restriction.as_fraction(),
1662        })
1663    }
1664}
1665
1666/// Representation of a guard or fallback, as returned by [`GuardMgr::select_guard()`].
1667#[derive(Debug, Clone)]
1668pub struct FirstHop {
1669    /// The sample from which this guard was taken, or `None` if this is a fallback.
1670    sample: Option<GuardSetSelector>,
1671    /// Information about connecting to (or through) this guard.
1672    inner: FirstHopInner,
1673}
1674/// The enumeration inside a FirstHop that holds information about how to
1675/// connect to (and possibly through) a guard or fallback.
1676#[derive(Debug, Clone)]
1677enum FirstHopInner {
1678    /// We have enough information to connect to a guard.
1679    Chan(OwnedChanTarget),
1680    /// We have enough information to connect to a guards _and_ to build
1681    /// multihop circuits through it.
1682    #[cfg_attr(not(feature = "bridge-client"), allow(dead_code))]
1683    Circ(OwnedCircTarget),
1684}
1685
1686impl FirstHop {
1687    /// Return a new [`FirstHopId`] for this `FirstHop`.
1688    fn first_hop_id(&self) -> FirstHopId {
1689        match &self.sample {
1690            Some(sample) => {
1691                let guard_id = GuardId::from_relay_ids(self);
1692                FirstHopId::in_sample(sample.clone(), guard_id)
1693            }
1694            None => {
1695                let fallback_id = crate::ids::FallbackId::from_relay_ids(self);
1696                FirstHopId::from(fallback_id)
1697            }
1698        }
1699    }
1700
1701    /// Look up this guard in `netdir`.
1702    pub fn get_relay<'a>(&self, netdir: &'a NetDir) -> Option<Relay<'a>> {
1703        match &self.sample {
1704            #[cfg(feature = "bridge-client")]
1705            // Always return "None" for anything that isn't in the netdir.
1706            Some(s) if s.universe_type() == UniverseType::BridgeSet => None,
1707            // Otherwise ask the netdir.
1708            _ => netdir.by_ids(self),
1709        }
1710    }
1711
1712    /// Return true if this guard is a bridge.
1713    pub fn is_bridge(&self) -> bool {
1714        match &self.sample {
1715            #[cfg(feature = "bridge-client")]
1716            Some(s) if s.universe_type() == UniverseType::BridgeSet => true,
1717            _ => false,
1718        }
1719    }
1720
1721    /// If possible, return a view of this object that can be used to build a circuit.
1722    pub fn as_circ_target(&self) -> Option<&OwnedCircTarget> {
1723        match &self.inner {
1724            FirstHopInner::Chan(_) => None,
1725            FirstHopInner::Circ(ct) => Some(ct),
1726        }
1727    }
1728
1729    /// Return a view of this as an OwnedChanTarget.
1730    fn chan_target_mut(&mut self) -> &mut OwnedChanTarget {
1731        match &mut self.inner {
1732            FirstHopInner::Chan(ct) => ct,
1733            FirstHopInner::Circ(ct) => ct.chan_target_mut(),
1734        }
1735    }
1736
1737    /// If possible and appropriate, find a circuit target in `bridges` for this
1738    /// `FirstHop`, and make this `FirstHop` a viable circuit target.
1739    ///
1740    /// (By default, any `FirstHop` that a `GuardSet` returns will have enough
1741    /// information to be a `ChanTarget`, but it will be lacking the additional
1742    /// network information in `CircTarget`[^1] necessary for us to build a
1743    /// multi-hop circuit through it.  If this FirstHop is a regular non-bridge
1744    /// `Relay`, then the `CircMgr` will later look up that circuit information
1745    /// itself from the network directory. But if this `FirstHop` *is* a bridge,
1746    /// then we need to find that information in the `BridgeSet`, since the
1747    /// CircMgr does not keep track of the `BridgeSet`.)
1748    ///
1749    /// [^1]: For example, supported protocol versions and ntor keys.
1750    #[cfg(feature = "bridge-client")]
1751    fn lookup_bridge_circ_target(&mut self, bridges: &bridge::BridgeSet) {
1752        use crate::sample::CandidateStatus::Present;
1753        if self.sample.as_ref().map(|s| s.universe_type()) == Some(UniverseType::BridgeSet)
1754            && matches!(self.inner, FirstHopInner::Chan(_))
1755        {
1756            if let Present(bridge_relay) = bridges.bridge_relay_by_guard(self) {
1757                if let Some(circ_target) = bridge_relay.as_relay_with_desc() {
1758                    self.inner =
1759                        FirstHopInner::Circ(OwnedCircTarget::from_circ_target(&circ_target));
1760                }
1761            }
1762        }
1763    }
1764
1765    /// Return true if this `FirstHop` contains circuit target information.
1766    ///
1767    /// This is true if `lookup_bridge_circ_target()` has been called, and it
1768    /// successfully found the circuit target information.
1769    #[cfg(feature = "bridge-client")]
1770    fn contains_circ_target(&self) -> bool {
1771        matches!(self.inner, FirstHopInner::Circ(_))
1772    }
1773}
1774
1775// This is somewhat redundant with the implementations in crate::guard::Guard.
1776impl tor_linkspec::HasAddrs for FirstHop {
1777    fn addrs(&self) -> impl Iterator<Item = SocketAddr> {
1778        match &self.inner {
1779            FirstHopInner::Chan(ct) => Either::Left(ct.addrs()),
1780            FirstHopInner::Circ(ct) => Either::Right(ct.addrs()),
1781        }
1782    }
1783}
1784impl tor_linkspec::HasRelayIds for FirstHop {
1785    fn identity(
1786        &self,
1787        key_type: tor_linkspec::RelayIdType,
1788    ) -> Option<tor_linkspec::RelayIdRef<'_>> {
1789        match &self.inner {
1790            FirstHopInner::Chan(ct) => ct.identity(key_type),
1791            FirstHopInner::Circ(ct) => ct.identity(key_type),
1792        }
1793    }
1794}
1795impl tor_linkspec::HasChanMethod for FirstHop {
1796    fn chan_method(&self) -> tor_linkspec::ChannelMethod {
1797        match &self.inner {
1798            FirstHopInner::Chan(ct) => ct.chan_method(),
1799            FirstHopInner::Circ(ct) => ct.chan_method(),
1800        }
1801    }
1802}
1803impl tor_linkspec::ChanTarget for FirstHop {}
1804
1805/// The purpose for which we plan to use a guard.
1806///
1807/// This can affect the guard selection algorithm.
1808#[derive(Clone, Debug, Default, Eq, PartialEq)]
1809#[non_exhaustive]
1810pub enum GuardUsageKind {
1811    /// We want to use this guard for a data circuit.
1812    ///
1813    /// (This encompasses everything except the `OneHopDirectory` case.)
1814    #[default]
1815    Data,
1816    /// We want to use this guard for a one-hop, non-anonymous
1817    /// directory request.
1818    ///
1819    /// (Our algorithm allows more parallelism for the guards that we use
1820    /// for these circuits.)
1821    OneHopDirectory,
1822}
1823
1824/// A set of parameters describing how a single guard should be selected.
1825///
1826/// Used as an argument to [`GuardMgr::select_guard`].
1827#[derive(Clone, Debug, derive_builder::Builder)]
1828#[builder(build_fn(error = "tor_config::ConfigBuildError"))]
1829pub struct GuardUsage {
1830    /// The purpose for which this guard will be used.
1831    #[builder(default)]
1832    kind: GuardUsageKind,
1833    /// A list of restrictions on which guard may be used.
1834    ///
1835    /// The default is the empty list.
1836    #[builder(sub_builder, setter(custom))]
1837    restrictions: GuardRestrictionList,
1838}
1839
1840impl_standard_builder! { GuardUsage: !Deserialize }
1841
1842/// List of socket restrictions, as configured
1843pub type GuardRestrictionList = Vec<GuardRestriction>;
1844
1845define_list_builder_helper! {
1846    pub struct GuardRestrictionListBuilder {
1847        restrictions: [GuardRestriction],
1848    }
1849    built: GuardRestrictionList = restrictions;
1850    default = vec![];
1851    item_build: |restriction| Ok(restriction.clone());
1852}
1853
1854define_list_builder_accessors! {
1855    struct GuardUsageBuilder {
1856        pub restrictions: [GuardRestriction],
1857    }
1858}
1859
1860impl GuardUsageBuilder {
1861    /// Create a new empty [`GuardUsageBuilder`].
1862    pub fn new() -> Self {
1863        Self::default()
1864    }
1865}
1866
1867/// A restriction that applies to a single request for a guard.
1868///
1869/// Restrictions differ from filters (see [`GuardFilter`]) in that
1870/// they apply to single requests, not to our entire set of guards.
1871/// They're suitable for things like making sure that we don't start
1872/// and end a circuit at the same relay, or requiring a specific
1873/// subprotocol version for certain kinds of requests.
1874#[derive(Clone, Debug, Serialize, Deserialize)]
1875#[non_exhaustive]
1876pub enum GuardRestriction {
1877    /// Don't pick a guard with the provided identity.
1878    AvoidId(RelayId),
1879    /// Don't pick a guard with any of the provided Ed25519 identities.
1880    AvoidAllIds(RelayIdSet),
1881}
1882
1883/// The kind of vanguards to use.
1884#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1885#[derive(Serialize, Deserialize)] //
1886#[derive(derive_more::Display)] //
1887#[serde(rename_all = "lowercase")]
1888#[cfg(feature = "vanguards")]
1889#[non_exhaustive]
1890pub enum VanguardMode {
1891    /// "Lite" vanguards.
1892    #[default]
1893    #[display("lite")]
1894    Lite = 1,
1895    /// "Full" vanguards.
1896    #[display("full")]
1897    Full = 2,
1898    /// Vanguards are disabled.
1899    #[display("disabled")]
1900    Disabled = 0,
1901}
1902
1903#[cfg(feature = "vanguards")]
1904impl VanguardMode {
1905    /// Build a `VanguardMode` from a [`NetParameters`] parameter.
1906    ///
1907    /// Used for converting [`vanguards_enabled`](NetParameters::vanguards_enabled)
1908    /// or [`vanguards_hs_service`](NetParameters::vanguards_hs_service)
1909    /// to the corresponding `VanguardMode`.
1910    pub(crate) fn from_net_parameter(val: BoundedInt32<0, 2>) -> Self {
1911        match val.get() {
1912            0 => VanguardMode::Disabled,
1913            1 => VanguardMode::Lite,
1914            2 => VanguardMode::Full,
1915            _ => unreachable!("BoundedInt32 was not bounded?!"),
1916        }
1917    }
1918}
1919
1920impl_not_auto_value!(VanguardMode);
1921
1922/// Vanguards configuration.
1923#[derive(Debug, Default, Clone, Eq, PartialEq, derive_builder::Builder)]
1924#[builder(build_fn(error = "ConfigBuildError"))]
1925#[builder(derive(Debug, Serialize, Deserialize))]
1926pub struct VanguardConfig {
1927    /// The kind of vanguards to use.
1928    #[builder_field_attr(serde(default))]
1929    #[builder(default)]
1930    mode: ExplicitOrAuto<VanguardMode>,
1931}
1932
1933impl VanguardConfig {
1934    /// Return the configured [`VanguardMode`].
1935    ///
1936    /// Returns the [`Default`] `VanguardMode`
1937    /// if the mode is [`Auto`](ExplicitOrAuto) or unspecified.
1938    pub fn mode(&self) -> VanguardMode {
1939        match self.mode {
1940            ExplicitOrAuto::Auto => Default::default(),
1941            ExplicitOrAuto::Explicit(mode) => mode,
1942        }
1943    }
1944}
1945
1946/// The kind of vanguards to use.
1947#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1948#[derive(Serialize, Deserialize)] //
1949#[derive(derive_more::Display)] //
1950#[serde(rename_all = "lowercase")]
1951#[cfg(not(feature = "vanguards"))]
1952#[non_exhaustive]
1953pub enum VanguardMode {
1954    /// Vanguards are disabled.
1955    #[default]
1956    #[display("disabled")]
1957    Disabled = 0,
1958}
1959
1960#[cfg(test)]
1961mod test {
1962    // @@ begin test lint list maintained by maint/add_warning @@
1963    #![allow(clippy::bool_assert_comparison)]
1964    #![allow(clippy::clone_on_copy)]
1965    #![allow(clippy::dbg_macro)]
1966    #![allow(clippy::mixed_attributes_style)]
1967    #![allow(clippy::print_stderr)]
1968    #![allow(clippy::print_stdout)]
1969    #![allow(clippy::single_char_pattern)]
1970    #![allow(clippy::unwrap_used)]
1971    #![allow(clippy::unchecked_time_subtraction)]
1972    #![allow(clippy::useless_vec)]
1973    #![allow(clippy::needless_pass_by_value)]
1974    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
1975    use super::*;
1976    use tor_linkspec::{HasAddrs, HasRelayIds};
1977    use tor_persist::TestingStateMgr;
1978    use tor_rtcompat::test_with_all_runtimes;
1979
1980    #[test]
1981    fn guard_param_defaults() {
1982        let p1 = GuardParams::default();
1983        let p2: GuardParams = (&NetParameters::default()).try_into().unwrap();
1984        assert_eq!(p1, p2);
1985    }
1986
1987    fn init<R: Runtime>(rt: R) -> (GuardMgr<R>, TestingStateMgr, NetDir) {
1988        use tor_netdir::{MdReceiver, PartialNetDir, testnet};
1989        let statemgr = TestingStateMgr::new();
1990        let have_lock = statemgr.try_lock().unwrap();
1991        assert!(have_lock.held());
1992        let guardmgr = GuardMgr::new(rt, statemgr.clone(), &TestConfig::default()).unwrap();
1993        let (con, mds) = testnet::construct_network().unwrap();
1994        let param_overrides = vec![
1995            // We make the sample size smaller than usual to compensate for the
1996            // small testing network.  (Otherwise, we'd sample the whole network,
1997            // and not be able to observe guards in the tests.)
1998            "guard-min-filtered-sample-size=5",
1999            // We choose only two primary guards, to make the tests easier to write.
2000            "guard-n-primary-guards=2",
2001            // We define any restriction that allows 75% or fewer of relays as "meaningful",
2002            // so that we can test the "restrictive" guard sample behavior, and to avoid
2003            "guard-meaningful-restriction-percent=75",
2004        ];
2005        let param_overrides: String =
2006            itertools::Itertools::intersperse(param_overrides.into_iter(), " ").collect();
2007        let override_p = param_overrides.parse().unwrap();
2008        let mut netdir = PartialNetDir::new(con, Some(&override_p));
2009        for md in mds {
2010            netdir.add_microdesc(md);
2011        }
2012        let netdir = netdir.unwrap_if_sufficient().unwrap();
2013
2014        (guardmgr, statemgr, netdir)
2015    }
2016
2017    #[test]
2018    #[allow(clippy::clone_on_copy)]
2019    fn simple_case() {
2020        test_with_all_runtimes!(|rt| async move {
2021            let (guardmgr, statemgr, netdir) = init(rt.clone());
2022            let usage = GuardUsage::default();
2023            guardmgr.install_test_netdir(&netdir);
2024
2025            let (id, mon, usable) = guardmgr.select_guard(usage).unwrap();
2026            // Report that the circuit succeeded.
2027            mon.succeeded();
2028
2029            // May we use the circuit?
2030            let usable = usable.await.unwrap();
2031            assert!(usable);
2032
2033            // Save the state...
2034            guardmgr.flush_msg_queue().await;
2035            guardmgr.store_persistent_state().unwrap();
2036            drop(guardmgr);
2037
2038            // Try reloading from the state...
2039            let guardmgr2 =
2040                GuardMgr::new(rt.clone(), statemgr.clone(), &TestConfig::default()).unwrap();
2041            guardmgr2.install_test_netdir(&netdir);
2042
2043            // Since the guard was confirmed, we should get the same one this time!
2044            let usage = GuardUsage::default();
2045            let (id2, _mon, _usable) = guardmgr2.select_guard(usage).unwrap();
2046            assert!(id2.same_relay_ids(&id));
2047        });
2048    }
2049
2050    #[test]
2051    fn simple_waiting() {
2052        // TODO(nickm): This test fails in rare cases; I suspect a
2053        // race condition somewhere.
2054        //
2055        // I've doubled up on the queue flushing in order to try to make the
2056        // race less likely, but we should investigate.
2057        test_with_all_runtimes!(|rt| async move {
2058            let (guardmgr, _statemgr, netdir) = init(rt);
2059            let u = GuardUsage::default();
2060            guardmgr.install_test_netdir(&netdir);
2061
2062            // We'll have the first two guard fail, which should make us
2063            // try a non-primary guard.
2064            let (id1, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2065            mon.failed();
2066            guardmgr.flush_msg_queue().await; // avoid race
2067            guardmgr.flush_msg_queue().await; // avoid race
2068            let (id2, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2069            mon.failed();
2070            guardmgr.flush_msg_queue().await; // avoid race
2071            guardmgr.flush_msg_queue().await; // avoid race
2072
2073            assert!(!id1.same_relay_ids(&id2));
2074
2075            // Now we should get two sampled guards. They should be different.
2076            let (id3, mon3, usable3) = guardmgr.select_guard(u.clone()).unwrap();
2077            let (id4, mon4, usable4) = guardmgr.select_guard(u.clone()).unwrap();
2078            assert!(!id3.same_relay_ids(&id4));
2079
2080            let (u3, u4) = futures::join!(
2081                async {
2082                    mon3.failed();
2083                    guardmgr.flush_msg_queue().await; // avoid race
2084                    usable3.await.unwrap()
2085                },
2086                async {
2087                    mon4.succeeded();
2088                    usable4.await.unwrap()
2089                }
2090            );
2091
2092            assert_eq!((u3, u4), (false, true));
2093        });
2094    }
2095
2096    #[test]
2097    fn filtering_basics() {
2098        test_with_all_runtimes!(|rt| async move {
2099            let (guardmgr, _statemgr, netdir) = init(rt);
2100            let u = GuardUsage::default();
2101            let filter = {
2102                let mut f = GuardFilter::default();
2103                // All the addresses in the test network are {0,1,2,3,4}.0.0.3:9001.
2104                // Limit to only 2.0.0.0/8
2105                f.push_reachable_addresses(vec!["2.0.0.0/8:9001".parse().unwrap()]);
2106                f
2107            };
2108            guardmgr.set_filter(filter);
2109            guardmgr.install_test_netdir(&netdir);
2110            let (guard, _mon, _usable) = guardmgr.select_guard(u).unwrap();
2111            // Make sure that the filter worked.
2112            let addr = guard.addrs().next().unwrap();
2113            assert_eq!(addr, "2.0.0.3:9001".parse().unwrap());
2114        });
2115    }
2116
2117    #[test]
2118    fn external_status() {
2119        test_with_all_runtimes!(|rt| async move {
2120            let (guardmgr, _statemgr, netdir) = init(rt);
2121            let data_usage = GuardUsage::default();
2122            let dir_usage = GuardUsageBuilder::new()
2123                .kind(GuardUsageKind::OneHopDirectory)
2124                .build()
2125                .unwrap();
2126            guardmgr.install_test_netdir(&netdir);
2127            {
2128                // Override this parameter, so that we can get deterministic results below.
2129                let mut inner = guardmgr.inner.lock().unwrap();
2130                inner.params.dir_parallelism = 1;
2131            }
2132
2133            let (guard, mon, _usable) = guardmgr.select_guard(data_usage.clone()).unwrap();
2134            mon.succeeded();
2135
2136            // Record that this guard gave us a bad directory object.
2137            guardmgr.note_external_failure(&guard, ExternalActivity::DirCache);
2138
2139            // We ask for another guard, for data usage.  We should get the same
2140            // one as last time, since the director failure doesn't mean this
2141            // guard is useless as a primary guard.
2142            let (g2, mon, _usable) = guardmgr.select_guard(data_usage).unwrap();
2143            assert_eq!(g2.ed_identity(), guard.ed_identity());
2144            mon.succeeded();
2145
2146            // But if we ask for a guard for directory usage, we should get a
2147            // different one, since the last guard we gave out failed.
2148            let (g3, mon, _usable) = guardmgr.select_guard(dir_usage.clone()).unwrap();
2149            assert_ne!(g3.ed_identity(), guard.ed_identity());
2150            mon.succeeded();
2151
2152            // Now record a success for for directory usage.
2153            guardmgr.note_external_success(&guard, ExternalActivity::DirCache);
2154
2155            // Now that the guard is working as a cache, asking for it should get us the same guard.
2156            let (g4, _mon, _usable) = guardmgr.select_guard(dir_usage).unwrap();
2157            assert_eq!(g4.ed_identity(), guard.ed_identity());
2158        });
2159    }
2160
2161    #[cfg(feature = "vanguards")]
2162    #[test]
2163    fn vanguard_mode_ord() {
2164        assert!(VanguardMode::Disabled < VanguardMode::Lite);
2165        assert!(VanguardMode::Disabled < VanguardMode::Full);
2166        assert!(VanguardMode::Lite < VanguardMode::Full);
2167    }
2168}