Skip to main content

tor_guardmgr/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3// @@ begin lint list maintained by maint/add_warning @@
4#![allow(renamed_and_removed_lints)] // @@REMOVE_WHEN(ci_arti_stable)
5#![allow(unknown_lints)] // @@REMOVE_WHEN(ci_arti_nightly)
6#![warn(missing_docs)]
7#![warn(noop_method_call)]
8#![warn(unreachable_pub)]
9#![warn(clippy::all)]
10#![deny(clippy::await_holding_lock)]
11#![deny(clippy::cargo_common_metadata)]
12#![deny(clippy::cast_lossless)]
13#![deny(clippy::checked_conversions)]
14#![warn(clippy::cognitive_complexity)]
15#![deny(clippy::debug_assert_with_mut_call)]
16#![deny(clippy::exhaustive_enums)]
17#![deny(clippy::exhaustive_structs)]
18#![deny(clippy::expl_impl_clone_on_copy)]
19#![deny(clippy::fallible_impl_from)]
20#![deny(clippy::implicit_clone)]
21#![deny(clippy::large_stack_arrays)]
22#![warn(clippy::manual_ok_or)]
23#![deny(clippy::missing_docs_in_private_items)]
24#![warn(clippy::needless_borrow)]
25#![warn(clippy::needless_pass_by_value)]
26#![warn(clippy::option_option)]
27#![deny(clippy::print_stderr)]
28#![deny(clippy::print_stdout)]
29#![warn(clippy::rc_buffer)]
30#![deny(clippy::ref_option_ref)]
31#![warn(clippy::semicolon_if_nothing_returned)]
32#![warn(clippy::trait_duplication_in_bounds)]
33#![deny(clippy::unchecked_time_subtraction)]
34#![deny(clippy::unnecessary_wraps)]
35#![warn(clippy::unseparated_literal_suffix)]
36#![deny(clippy::unwrap_used)]
37#![deny(clippy::mod_module_files)]
38#![allow(clippy::let_unit_value)] // This can reasonably be done for explicitness
39#![allow(clippy::uninlined_format_args)]
40#![allow(clippy::significant_drop_in_scrutinee)] // arti/-/merge_requests/588/#note_2812945
41#![allow(clippy::result_large_err)] // temporary workaround for arti#587
42#![allow(clippy::needless_raw_string_hashes)] // complained-about code is fine, often best
43#![allow(clippy::needless_lifetimes)] // See arti#1765
44#![allow(mismatched_lifetime_syntaxes)] // temporary workaround for arti#2060
45#![allow(clippy::collapsible_if)] // See arti#2342
46#![deny(clippy::unused_async)]
47#![deny(clippy::string_slice)] // See arti#2571
48//! <!-- @@ end lint list maintained by maint/add_warning @@ -->
49
50// TODO #1645 (either remove this, or decide to have it everywhere)
51#![cfg_attr(not(all(feature = "full", feature = "experimental")), allow(unused))]
52
53// Glossary:
54//     Primary guard
55//     Sample
56//     confirmed
57//     filtered
58
59use derive_deftly::Deftly;
60use futures::channel::mpsc;
61use itertools::Either;
62use serde::{Deserialize, Serialize};
63use std::collections::HashMap;
64use std::net::SocketAddr;
65use std::sync::{Arc, Mutex, Weak};
66#[cfg(feature = "bridge-client")]
67use tor_error::internal;
68use tor_linkspec::{OwnedChanTarget, OwnedCircTarget, RelayId, RelayIdSet};
69use tor_netdir::NetDirProvider;
70use tor_proto::ClockSkew;
71use tor_rtcompat::SpawnExt;
72use tor_units::BoundedInt32;
73use tracing::{debug, info, instrument, trace, warn};
74use web_time_compat::{Duration, Instant, SystemTime};
75
76use tor_config::derive::prelude::*;
77use tor_config::{ExplicitOrAuto, impl_standard_builder};
78use tor_config::{ReconfigureError, impl_not_auto_value};
79use tor_config::{define_list_builder_accessors, define_list_builder_helper};
80use tor_netdir::{NetDir, Relay, params::NetParameters};
81use tor_persist::{DynStorageHandle, StateMgr};
82use tor_rtcompat::Runtime;
83
84#[cfg(feature = "bridge-client")]
85pub mod bridge;
86mod config;
87mod daemon;
88mod dirstatus;
89mod err;
90mod events;
91pub mod fallback;
92mod filter;
93mod guard;
94mod ids;
95mod pending;
96mod sample;
97mod skew;
98mod util;
99#[cfg(feature = "vanguards")]
100pub mod vanguards;
101
102#[cfg(not(feature = "bridge-client"))]
103#[path = "bridge_disabled.rs"]
104pub mod bridge;
105
106#[cfg(any(test, feature = "testing"))]
107pub use config::testing::TestConfig;
108
109#[cfg(test)]
110use oneshot_fused_workaround as oneshot;
111
112pub use config::GuardMgrConfig;
113pub use err::{GuardMgrConfigError, GuardMgrError, PickGuardError};
114pub use events::ClockSkewEvents;
115pub use filter::GuardFilter;
116pub use ids::FirstHopId;
117pub use pending::{GuardMonitor, GuardStatus, GuardUsable};
118pub use skew::SkewEstimate;
119
120#[cfg(feature = "vanguards")]
121pub use vanguards::VanguardMgrError;
122
123use pending::{PendingRequest, RequestId};
124use sample::{GuardSet, Universe, UniverseRef};
125
126use crate::ids::{FirstHopIdInner, GuardId};
127
128/// A "guard manager" that selects and remembers a persistent set of
129/// guard nodes.
130///
131/// This is a "handle"; clones of it share state.
132#[derive(Clone)]
133pub struct GuardMgr<R: Runtime> {
134    /// An asynchronous runtime object.
135    ///
136    /// GuardMgr uses this runtime for timing, timeouts, and spawning
137    /// tasks.
138    runtime: R,
139
140    /// Internal state for the guard manager.
141    inner: Arc<Mutex<GuardMgrInner>>,
142}
143
144/// Helper type that holds the data used by a [`GuardMgr`].
145///
146/// This would just be a [`GuardMgr`], except that it needs to sit inside
147/// a `Mutex` and get accessed by daemon tasks.
148struct GuardMgrInner {
149    /// Last time when marked all of our primary guards as retriable.
150    ///
151    /// We keep track of this time so that we can rate-limit
152    /// these attempts.
153    last_primary_retry_time: Instant,
154
155    /// Persistent guard manager state.
156    ///
157    /// This object remembers one or more persistent set of guards that we can
158    /// use, along with their relative priorities and statuses.
159    guards: GuardSets,
160
161    /// The current filter that we're using to decide which guards are
162    /// supported.
163    //
164    // TODO: This field is duplicated in the current active [`GuardSet`]; we
165    // should fix that.
166    filter: GuardFilter,
167
168    /// Configuration values derived from the consensus parameters.
169    ///
170    /// This is updated whenever the consensus parameters change.
171    params: GuardParams,
172
173    /// A mpsc channel, used to tell the task running in
174    /// [`daemon::report_status_events`] about a new event to monitor.
175    ///
176    /// This uses an `UnboundedSender` so that we don't have to await
177    /// while sending the message, which in turn allows the GuardMgr
178    /// API to be simpler.  The risk, however, is that there's no
179    /// backpressure in the event that the task running
180    /// [`daemon::report_status_events`] fails to read from this
181    /// channel.
182    ctrl: mpsc::UnboundedSender<daemon::Msg>,
183
184    /// Information about guards that we've given out, but where we have
185    /// not yet heard whether the guard was successful.
186    ///
187    /// Upon leaning whether the guard was successful, the pending
188    /// requests in this map may be either moved to `waiting`, or
189    /// discarded.
190    ///
191    /// There can be multiple pending requests corresponding to the
192    /// same guard.
193    pending: HashMap<RequestId, PendingRequest>,
194
195    /// A list of pending requests for which we have heard that the
196    /// guard was successful, but we have not yet decided whether the
197    /// circuit may be used.
198    ///
199    /// There can be multiple waiting requests corresponding to the
200    /// same guard.
201    waiting: Vec<PendingRequest>,
202
203    /// A list of fallback directories used to access the directory system
204    /// when no other directory information is yet known.
205    fallbacks: fallback::FallbackState,
206
207    /// Location in which to store persistent state.
208    storage: DynStorageHandle<GuardSets>,
209
210    /// A sender object to publish changes in our estimated clock skew.
211    send_skew: postage::watch::Sender<Option<SkewEstimate>>,
212
213    /// A receiver object to hand out to observers who want to know about
214    /// changes in our estimated clock skew.
215    recv_skew: events::ClockSkewEvents,
216
217    /// A netdir provider that we can use for adding new guards when
218    /// insufficient guards are available.
219    ///
220    /// This has to be an Option so it can be initialized from None: at the
221    /// time a GuardMgr is created, there is no NetDirProvider for it to use.
222    netdir_provider: Option<Weak<dyn NetDirProvider>>,
223
224    /// A netdir provider that we can use for discovering bridge descriptors.
225    ///
226    /// This has to be an Option so it can be initialized from None: at the time
227    /// a GuardMgr is created, there is no BridgeDescProvider for it to use.
228    #[cfg(feature = "bridge-client")]
229    bridge_desc_provider: Option<Weak<dyn bridge::BridgeDescProvider>>,
230
231    /// A list of the bridges that we are configured to use, or "None" if we are
232    /// not configured to use bridges.
233    #[cfg(feature = "bridge-client")]
234    configured_bridges: Option<Arc<[bridge::BridgeConfig]>>,
235}
236
237/// A selector that tells us which [`GuardSet`] of several is currently in use.
238#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, strum::EnumIter)]
239enum GuardSetSelector {
240    /// The default guard set is currently in use: that's the one that we use
241    /// when we have no filter installed, or the filter permits most of the
242    /// guards on the network.
243    #[default]
244    Default,
245    /// A "restrictive" guard set is currently in use: that's the one that we
246    /// use when we have a filter that excludes a large fraction of the guards
247    /// on the network.
248    Restricted,
249    /// The "bridges" guard set is currently in use: we are selecting our guards
250    /// from among the universe of configured bridges.
251    #[cfg(feature = "bridge-client")]
252    Bridges,
253}
254
255/// Describes the [`Universe`] that a guard sample should take its guards from.
256#[derive(Clone, Copy, Debug, Eq, PartialEq)]
257enum UniverseType {
258    /// Take information from the network directory.
259    NetDir,
260    /// Take information from the configured bridges.
261    #[cfg(feature = "bridge-client")]
262    BridgeSet,
263}
264
265impl GuardSetSelector {
266    /// Return a description of which [`Universe`] this guard sample should take
267    /// its guards from.
268    fn universe_type(&self) -> UniverseType {
269        match self {
270            GuardSetSelector::Default | GuardSetSelector::Restricted => UniverseType::NetDir,
271            #[cfg(feature = "bridge-client")]
272            GuardSetSelector::Bridges => UniverseType::BridgeSet,
273        }
274    }
275}
276
277/// Persistent state for a guard manager, as serialized to disk.
278#[derive(Debug, Clone, Default, Serialize, Deserialize)]
279struct GuardSets {
280    /// Which set of guards is currently in use?
281    #[serde(skip)]
282    active_set: GuardSetSelector,
283
284    /// The default set of guards to use.
285    ///
286    /// We use this one when there is no filter, or the filter permits most of the
287    /// guards on the network.
288    default: GuardSet,
289
290    /// A guard set to use when we have a restrictive filter.
291    #[serde(default)]
292    restricted: GuardSet,
293
294    /// A guard set sampled from our configured bridges.
295    #[serde(default)]
296    #[cfg(feature = "bridge-client")]
297    bridges: GuardSet,
298
299    /// Unrecognized fields, including (possibly) other guard sets.
300    #[serde(flatten)]
301    remaining: HashMap<String, tor_persist::JsonValue>,
302}
303
304/// The key (filename) we use for storing our persistent guard state in the
305/// `StateMgr`.
306///
307/// We used to store this in a different format in a filename called
308/// "default_guards" (before Arti 0.1.0).
309const STORAGE_KEY: &str = "guards";
310
311/// A description of which circuits to retire because of a configuration change.
312///
313/// TODO(nickm): Eventually we will want to add a "Some" here, to support
314/// removing only those circuits that correspond to no-longer-usable guards.
315#[derive(Clone, Debug, Eq, PartialEq)]
316#[must_use]
317#[non_exhaustive]
318pub enum RetireCircuits {
319    /// There's no need to retire any circuits.
320    None,
321    /// All circuits should be retired.
322    All,
323}
324
325impl<R: Runtime> GuardMgr<R> {
326    /// Create a new "empty" guard manager and launch its background tasks.
327    ///
328    /// It won't be able to hand out any guards until a [`NetDirProvider`] has
329    /// been installed.
330    #[instrument(skip_all, level = "trace")]
331    pub fn new<S>(
332        runtime: R,
333        state_mgr: S,
334        config: &impl GuardMgrConfig,
335    ) -> Result<Self, GuardMgrError>
336    where
337        S: StateMgr + Send + Sync + 'static,
338    {
339        let (ctrl, rcv) = mpsc::unbounded();
340        let storage: DynStorageHandle<GuardSets> = state_mgr.create_handle(STORAGE_KEY);
341        // TODO(nickm): We should do something about the old state in
342        // `default_guards`.  Probably it would be best to delete it.  We could
343        // try to migrate it instead, but that's beyond the stability guarantee
344        // that we're getting at this stage of our (pre-0.1) development.
345        let state = storage.load()?.unwrap_or_default();
346
347        let (send_skew, recv_skew) = postage::watch::channel();
348        let recv_skew = ClockSkewEvents { inner: recv_skew };
349
350        let inner = Arc::new(Mutex::new(GuardMgrInner {
351            guards: state,
352            filter: GuardFilter::unfiltered(),
353            last_primary_retry_time: runtime.now(),
354            params: GuardParams::default(),
355            ctrl,
356            pending: HashMap::new(),
357            waiting: Vec::new(),
358            fallbacks: config.fallbacks().into(),
359            storage,
360            send_skew,
361            recv_skew,
362            netdir_provider: None,
363            #[cfg(feature = "bridge-client")]
364            bridge_desc_provider: None,
365            #[cfg(feature = "bridge-client")]
366            configured_bridges: None,
367        }));
368        #[cfg(feature = "bridge-client")]
369        {
370            let mut inner = inner.lock().expect("lock poisoned");
371            // TODO(nickm): This calls `GuardMgrInner::update`. Will we mind doing so before any
372            // providers are configured? I think not, but we should make sure.
373            let _: RetireCircuits =
374                inner.replace_bridge_config(config, runtime.wallclock(), runtime.now())?;
375        }
376        {
377            let weak_inner = Arc::downgrade(&inner);
378            let rt_clone = runtime.clone();
379            runtime
380                .spawn(daemon::report_status_events(rt_clone, weak_inner, rcv))
381                .map_err(|e| GuardMgrError::from_spawn("guard status event reporter", e))?;
382        }
383        {
384            let rt_clone = runtime.clone();
385            let weak_inner = Arc::downgrade(&inner);
386            runtime
387                .spawn(daemon::run_periodic(rt_clone, weak_inner))
388                .map_err(|e| GuardMgrError::from_spawn("periodic guard updater", e))?;
389        }
390        Ok(GuardMgr { runtime, inner })
391    }
392
393    /// Install a [`NetDirProvider`] for use by this guard manager.
394    ///
395    /// It will be used to keep the guards up-to-date with changes from the
396    /// network directory, and to find new guards when no NetDir is provided to
397    /// select_guard().
398    ///
399    /// TODO: we should eventually return some kind of a task handle from this
400    /// task, even though it is not strictly speaking periodic.
401    ///
402    /// The guardmgr retains only a `Weak` reference to `provider`,
403    /// `install_netdir_provider` downgrades it on entry,
404    // TODO add ref to document when https://gitlab.torproject.org/tpo/core/arti/-/issues/624
405    // is fixed.  Also, maybe take an owned `Weak` to start with.
406    //
407    /// # Panics
408    ///
409    /// Panics if a [`NetDirProvider`] is already installed.
410    pub fn install_netdir_provider(
411        &self,
412        provider: &Arc<dyn NetDirProvider>,
413    ) -> Result<(), GuardMgrError> {
414        let weak_provider = Arc::downgrade(provider);
415        {
416            let mut inner = self.inner.lock().expect("Poisoned lock");
417            assert!(inner.netdir_provider.is_none());
418            inner.netdir_provider = Some(weak_provider.clone());
419        }
420        let weak_inner = Arc::downgrade(&self.inner);
421        let rt_clone = self.runtime.clone();
422        self.runtime
423            .spawn(daemon::keep_netdir_updated(
424                rt_clone,
425                weak_inner,
426                weak_provider,
427            ))
428            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
429        Ok(())
430    }
431
432    /// Configure a new [`bridge::BridgeDescProvider`] for this [`GuardMgr`].
433    ///
434    /// It will be used to learn about changes in the set of available bridge
435    /// descriptors; we'll inform it whenever our desired set of bridge
436    /// descriptors changes.
437    ///
438    /// TODO: Same todo as in `install_netdir_provider` about task handles.
439    ///
440    /// # Panics
441    ///
442    /// Panics if a [`bridge::BridgeDescProvider`] is already installed.
443    #[cfg(feature = "bridge-client")]
444    pub fn install_bridge_desc_provider(
445        &self,
446        provider: &Arc<dyn bridge::BridgeDescProvider>,
447    ) -> Result<(), GuardMgrError> {
448        let weak_provider = Arc::downgrade(provider);
449        {
450            let mut inner = self.inner.lock().expect("Poisoned lock");
451            assert!(inner.bridge_desc_provider.is_none());
452            inner.bridge_desc_provider = Some(weak_provider.clone());
453        }
454
455        let weak_inner = Arc::downgrade(&self.inner);
456        let rt_clone = self.runtime.clone();
457        self.runtime
458            .spawn(daemon::keep_bridge_descs_updated(
459                rt_clone,
460                weak_inner,
461                weak_provider,
462            ))
463            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
464
465        Ok(())
466    }
467
468    /// Flush our current guard state to the state manager, if there
469    /// is any unsaved state.
470    pub fn store_persistent_state(&self) -> Result<(), GuardMgrError> {
471        let inner = self.inner.lock().expect("Poisoned lock");
472        trace!("Flushing guard state to disk.");
473        inner.storage.store(&inner.guards)?;
474        Ok(())
475    }
476
477    /// Reload state from the state manager.
478    ///
479    /// We only call this method if we _don't_ have the lock on the state
480    /// files.  If we have the lock, we only want to save.
481    #[instrument(level = "trace", skip_all)]
482    pub fn reload_persistent_state(&self) -> Result<(), GuardMgrError> {
483        let mut inner = self.inner.lock().expect("Poisoned lock");
484        if let Some(new_guards) = inner.storage.load()? {
485            inner.replace_guards_with(new_guards, self.runtime.wallclock(), self.runtime.now());
486        }
487        Ok(())
488    }
489
490    /// Switch from having an unowned persistent state to having an owned one.
491    ///
492    /// Requires that we hold the lock on the state files.
493    #[instrument(level = "trace", skip_all)]
494    pub fn upgrade_to_owned_persistent_state(&self) -> Result<(), GuardMgrError> {
495        let mut inner = self.inner.lock().expect("Poisoned lock");
496        debug_assert!(inner.storage.can_store());
497        let new_guards = inner.storage.load()?.unwrap_or_default();
498        let wallclock = self.runtime.wallclock();
499        let now = self.runtime.now();
500        inner.replace_guards_with(new_guards, wallclock, now);
501        Ok(())
502    }
503
504    /// Return true if `netdir` has enough information to safely become our new netdir.
505    pub fn netdir_is_sufficient(&self, netdir: &NetDir) -> bool {
506        let mut inner = self.inner.lock().expect("Poisoned lock");
507        if inner.guards.active_set.universe_type() != UniverseType::NetDir {
508            // If we aren't using the netdir, this isn't something we want to look at.
509            return true;
510        }
511        inner
512            .guards
513            .active_guards_mut()
514            .n_primary_without_id_info_in(netdir)
515            == 0
516    }
517
518    /// Mark every guard as potentially retriable, regardless of how recently we
519    /// failed to connect to it.
520    pub fn mark_all_guards_retriable(&self) {
521        let mut inner = self.inner.lock().expect("Poisoned lock");
522        inner.guards.active_guards_mut().mark_all_guards_retriable();
523    }
524
525    /// Configure this guardmgr to use a fixed [`NetDir`] instead of a provider.
526    ///
527    /// This function is for testing only, and is exclusive with
528    /// `install_netdir_provider`.
529    ///
530    /// # Panics
531    ///
532    /// Panics if any [`NetDirProvider`] has already been installed.
533    #[cfg(any(test, feature = "testing"))]
534    pub fn install_test_netdir(&self, netdir: &NetDir) {
535        use tor_netdir::testprovider::TestNetDirProvider;
536        let wallclock = self.runtime.wallclock();
537        let now = self.runtime.now();
538        let netdir_provider: Arc<dyn NetDirProvider> =
539            Arc::new(TestNetDirProvider::from(netdir.clone()));
540        self.install_netdir_provider(&netdir_provider)
541            .expect("Couldn't install testing network provider");
542
543        let mut inner = self.inner.lock().expect("Poisoned lock");
544        inner.update(wallclock, now);
545    }
546
547    /// Replace the configuration in this `GuardMgr` with `config`.
548    #[instrument(level = "trace", skip_all)]
549    pub fn reconfigure(
550        &self,
551        config: &impl GuardMgrConfig,
552    ) -> Result<RetireCircuits, ReconfigureError> {
553        let mut inner = self.inner.lock().expect("Poisoned lock");
554        // Change the set of configured fallbacks.
555        {
556            let mut fallbacks: fallback::FallbackState = config.fallbacks().into();
557            std::mem::swap(&mut inner.fallbacks, &mut fallbacks);
558            inner.fallbacks.take_status_from(fallbacks);
559        }
560        // If we are built to use bridges, change the bridge configuration.
561        #[cfg(feature = "bridge-client")]
562        {
563            let wallclock = self.runtime.wallclock();
564            let now = self.runtime.now();
565            Ok(inner.replace_bridge_config(config, wallclock, now)?)
566        }
567        // If we are built to use bridges, change the bridge configuration.
568        #[cfg(not(feature = "bridge-client"))]
569        {
570            Ok(RetireCircuits::None)
571        }
572    }
573
574    /// Replace the current [`GuardFilter`] used by this `GuardMgr`.
575    // TODO should this be part of the config?
576    pub fn set_filter(&self, filter: GuardFilter) {
577        let wallclock = self.runtime.wallclock();
578        let now = self.runtime.now();
579        let mut inner = self.inner.lock().expect("Poisoned lock");
580        inner.set_filter(filter, wallclock, now);
581    }
582
583    /// Select a guard for a given [`GuardUsage`].
584    ///
585    /// On success, we return a [`FirstHop`] object to identify which
586    /// guard we have picked, a [`GuardMonitor`] object that the
587    /// caller can use to report whether its attempt to use the guard
588    /// succeeded or failed, and a [`GuardUsable`] future that the
589    /// caller can use to decide whether a circuit built through the
590    /// guard is actually safe to use.
591    ///
592    /// That last point is important: It's okay to build a circuit
593    /// through the guard returned by this function, but you can't
594    /// actually use it for traffic unless the [`GuardUsable`] future
595    /// yields "true".
596    #[instrument(skip_all, level = "trace")]
597    pub fn select_guard(
598        &self,
599        usage: GuardUsage,
600    ) -> Result<(FirstHop, GuardMonitor, GuardUsable), PickGuardError> {
601        let now = self.runtime.now();
602        let wallclock = self.runtime.wallclock();
603
604        let mut inner = self.inner.lock().expect("Poisoned lock");
605
606        // (I am not 100% sure that we need to consider_all_retries here, but
607        // it should _probably_ not hurt.)
608        inner.guards.active_guards_mut().consider_all_retries(now);
609
610        let (origin, guard) = inner.select_guard_with_expand(&usage, now, wallclock)?;
611        trace!(?guard, ?usage, "Guard selected");
612
613        let (usable, usable_sender) = if origin.usable_immediately() {
614            (GuardUsable::new_usable_immediately(), None)
615        } else {
616            let (u, snd) = GuardUsable::new_uncertain();
617            (u, Some(snd))
618        };
619        let request_id = pending::RequestId::next();
620        let ctrl = inner.ctrl.clone();
621        let monitor = GuardMonitor::new(request_id, ctrl);
622
623        // Note that the network can be down even if all the primary guards
624        // are not yet marked as unreachable.  But according to guard-spec we
625        // don't want to acknowledge the net as down before that point, since
626        // we don't mark all the primary guards as retriable unless
627        // we've been forced to non-primary guards.
628        let net_has_been_down =
629            if let Some(duration) = tor_proto::time_since_last_incoming_traffic() {
630                inner
631                    .guards
632                    .active_guards_mut()
633                    .all_primary_guards_are_unreachable()
634                    && duration >= inner.params.internet_down_timeout
635            } else {
636                // TODO: Is this the correct behavior in this case?
637                false
638            };
639
640        let pending_request = pending::PendingRequest::new(
641            guard.first_hop_id(),
642            usage,
643            usable_sender,
644            net_has_been_down,
645        );
646        inner.pending.insert(request_id, pending_request);
647
648        match &guard.sample {
649            Some(sample) => {
650                let guard_id = GuardId::from_relay_ids(&guard);
651                inner
652                    .guards
653                    .guards_mut(sample)
654                    .record_attempt(&guard_id, now);
655            }
656            None => {
657                // We don't record attempts for fallbacks; we only care when
658                // they have failed.
659            }
660        }
661
662        Ok((guard, monitor, usable))
663    }
664
665    /// Record that _after_ we built a circuit with a guard, something described
666    /// in `external_failure` went wrong with it.
667    pub fn note_external_failure<T>(&self, identity: &T, external_failure: ExternalActivity)
668    where
669        T: tor_linkspec::HasRelayIds + ?Sized,
670    {
671        let now = self.runtime.now();
672        let mut inner = self.inner.lock().expect("Poisoned lock");
673        let ids = inner.lookup_ids(identity);
674        for id in ids {
675            match &id.0 {
676                FirstHopIdInner::Guard(sample, id) => {
677                    inner
678                        .guards
679                        .guards_mut(sample)
680                        .record_failure(id, Some(external_failure), now);
681                }
682                FirstHopIdInner::Fallback(id) => {
683                    if external_failure == ExternalActivity::DirCache {
684                        inner.fallbacks.note_failure(id, now);
685                    }
686                }
687            }
688        }
689    }
690
691    /// Record that _after_ we built a circuit with a guard, some activity
692    /// described in `external_activity` was successful with it.
693    pub fn note_external_success<T>(&self, identity: &T, external_activity: ExternalActivity)
694    where
695        T: tor_linkspec::HasRelayIds + ?Sized,
696    {
697        let mut inner = self.inner.lock().expect("Poisoned lock");
698
699        inner.record_external_success(identity, external_activity, self.runtime.wallclock());
700    }
701
702    /// Return a stream of events about our estimated clock skew; these events
703    /// are `None` when we don't have enough information to make an estimate,
704    /// and `Some(`[`SkewEstimate`]`)` otherwise.
705    ///
706    /// Note that this stream can be lossy: if the estimate changes more than
707    /// one before you read from the stream, you might only get the most recent
708    /// update.
709    pub fn skew_events(&self) -> ClockSkewEvents {
710        let inner = self.inner.lock().expect("Poisoned lock");
711        inner.recv_skew.clone()
712    }
713
714    /// Ensure that the message queue is flushed before proceeding to
715    /// the next step.  Used for testing.
716    #[cfg(test)]
717    async fn flush_msg_queue(&self) {
718        let (snd, rcv) = oneshot::channel();
719        let pingmsg = daemon::Msg::Ping(snd);
720        {
721            let inner = self.inner.lock().expect("Poisoned lock");
722            inner
723                .ctrl
724                .unbounded_send(pingmsg)
725                .expect("Guard observer task exited prematurely.");
726        }
727        let _ = rcv.await;
728    }
729}
730
731/// An activity that can succeed or fail, and whose success or failure can be
732/// attributed to a guard.
733#[derive(Copy, Clone, Debug, Eq, PartialEq)]
734#[non_exhaustive]
735pub enum ExternalActivity {
736    /// The activity of using the guard as a directory cache.
737    DirCache,
738}
739
740impl GuardSets {
741    /// Return a reference to the currently active set of guards.
742    ///
743    /// (That's easy enough for now, since there is never more than one set of
744    /// guards.  But eventually that will change, as we add support for more
745    /// complex filter types, and for bridge relays. Those will use separate
746    /// `GuardSet` instances, and this accessor will choose the right one.)
747    fn active_guards(&self) -> &GuardSet {
748        self.guards(&self.active_set)
749    }
750
751    /// Return the set of guards corresponding to the provided selector.
752    fn guards(&self, selector: &GuardSetSelector) -> &GuardSet {
753        match selector {
754            GuardSetSelector::Default => &self.default,
755            GuardSetSelector::Restricted => &self.restricted,
756            #[cfg(feature = "bridge-client")]
757            GuardSetSelector::Bridges => &self.bridges,
758        }
759    }
760
761    /// Return a mutable reference to the currently active set of guards.
762    fn active_guards_mut(&mut self) -> &mut GuardSet {
763        self.guards_mut(&self.active_set.clone())
764    }
765
766    /// Return a mutable reference to the set of guards corresponding to the
767    /// provided selector.
768    fn guards_mut(&mut self, selector: &GuardSetSelector) -> &mut GuardSet {
769        match selector {
770            GuardSetSelector::Default => &mut self.default,
771            GuardSetSelector::Restricted => &mut self.restricted,
772            #[cfg(feature = "bridge-client")]
773            GuardSetSelector::Bridges => &mut self.bridges,
774        }
775    }
776
777    /// Update all non-persistent state for the guards in this object with the
778    /// state in `other`.
779    fn copy_status_from(&mut self, mut other: GuardSets) {
780        use strum::IntoEnumIterator;
781        for sample in GuardSetSelector::iter() {
782            self.guards_mut(&sample)
783                .copy_ephemeral_status_into_newly_loaded_state(std::mem::take(
784                    other.guards_mut(&sample),
785                ));
786        }
787        self.active_set = other.active_set;
788    }
789}
790
791impl GuardMgrInner {
792    /// Look up the latest [`NetDir`] (if there is one) from our
793    /// [`NetDirProvider`] (if we have one).
794    fn timely_netdir(&self) -> Option<Arc<NetDir>> {
795        self.netdir_provider
796            .as_ref()
797            .and_then(Weak::upgrade)
798            .and_then(|np| np.timely_netdir().ok())
799    }
800
801    /// Look up the latest [`BridgeDescList`](bridge::BridgeDescList) (if there
802    /// is one) from our [`BridgeDescProvider`](bridge::BridgeDescProvider) (if
803    /// we have one).
804    #[cfg(feature = "bridge-client")]
805    fn latest_bridge_desc_list(&self) -> Option<Arc<bridge::BridgeDescList>> {
806        self.bridge_desc_provider
807            .as_ref()
808            .and_then(Weak::upgrade)
809            .map(|bp| bp.bridges())
810    }
811
812    /// Run a function that takes `&mut self` and an optional NetDir.
813    ///
814    /// We try to use the netdir from our [`NetDirProvider`] (if we have one).
815    /// Therefore, although its _parameters_ are suitable for every
816    /// [`GuardSet`], its _contents_ might not be. For those, call
817    /// [`with_opt_universe`](Self::with_opt_universe) instead.
818    //
819    // This function exists to handle the lifetime mess where sometimes the
820    // resulting NetDir will borrow from `netdir`, and sometimes it will borrow
821    // from an Arc returned by `self.latest_netdir()`.
822    fn with_opt_netdir<F, T>(&mut self, func: F) -> T
823    where
824        F: FnOnce(&mut Self, Option<&NetDir>) -> T,
825    {
826        if let Some(nd) = self.timely_netdir() {
827            func(self, Some(nd.as_ref()))
828        } else {
829            func(self, None)
830        }
831    }
832
833    /// Return the latest `BridgeSet` based on our `BridgeDescProvider` and our
834    /// configured bridges.
835    ///
836    /// Returns `None` if we are not configured to use bridges.
837    #[cfg(feature = "bridge-client")]
838    fn latest_bridge_set(&self) -> Option<bridge::BridgeSet> {
839        let bridge_config = self.configured_bridges.as_ref()?.clone();
840        let bridge_descs = self.latest_bridge_desc_list();
841        Some(bridge::BridgeSet::new(bridge_config, bridge_descs))
842    }
843
844    /// Run a function that takes `&mut self` and an optional [`UniverseRef`].
845    ///
846    /// We try to get a universe from the appropriate source for the current
847    /// active guard set.
848    fn with_opt_universe<F, T>(&mut self, func: F) -> T
849    where
850        F: FnOnce(&mut Self, Option<&UniverseRef>) -> T,
851    {
852        // TODO: it might be nice to make `func` take an GuardSet and a set of
853        // parameters, so we can't get the active set wrong. Doing that will
854        // require a fair amount of refactoring so that the borrow checker is
855        // happy, however.
856        match self.guards.active_set.universe_type() {
857            UniverseType::NetDir => {
858                if let Some(nd) = self.timely_netdir() {
859                    func(self, Some(&UniverseRef::NetDir(nd)))
860                } else {
861                    func(self, None)
862                }
863            }
864            #[cfg(feature = "bridge-client")]
865            UniverseType::BridgeSet => func(
866                self,
867                self.latest_bridge_set()
868                    .map(UniverseRef::BridgeSet)
869                    .as_ref(),
870            ),
871        }
872    }
873
874    /// Update the status of all guards in the active set, based on the passage
875    /// of time, our configuration, and the relevant Universe for our active
876    /// set.
877    #[instrument(skip_all, level = "trace")]
878    fn update(&mut self, wallclock: SystemTime, now: Instant) {
879        self.with_opt_netdir(|this, netdir| {
880            // Here we update our parameters from the latest NetDir, and check
881            // whether we need to change to a (non)-restrictive GuardSet based
882            // on those parameters and our configured filter.
883            //
884            // This uses a NetDir unconditionally, since we always want to take
885            // the network parameters our parameters from the consensus even if
886            // the guards themselves are from a BridgeSet.
887            this.update_active_set_params_and_filter(netdir);
888        });
889        self.with_opt_universe(|this, univ| {
890            // Now we update the set of guards themselves based on the
891            // Universe, which is either the latest NetDir, or the latest
892            // BridgeSet—depending on what the GuardSet wants.
893            Self::update_guardset_internal(
894                &this.params,
895                wallclock,
896                this.guards.active_set.universe_type(),
897                this.guards.active_guards_mut(),
898                univ,
899            );
900            #[cfg(feature = "bridge-client")]
901            this.update_desired_descriptors(now);
902            #[cfg(not(feature = "bridge-client"))]
903            let _ = now;
904        });
905    }
906
907    /// Replace our bridge configuration with the one from `new_config`.
908    #[cfg(feature = "bridge-client")]
909    #[instrument(level = "trace", skip_all)]
910    fn replace_bridge_config(
911        &mut self,
912        new_config: &impl GuardMgrConfig,
913        wallclock: SystemTime,
914        now: Instant,
915    ) -> Result<RetireCircuits, GuardMgrConfigError> {
916        match (&self.configured_bridges, new_config.bridges_enabled()) {
917            (None, false) => {
918                assert_ne!(
919                    self.guards.active_set.universe_type(),
920                    UniverseType::BridgeSet
921                );
922                return Ok(RetireCircuits::None); // nothing to do
923            }
924            (_, true) if !self.storage.can_store() => {
925                // TODO: Ideally we would try to upgrade, obtaining an exclusive lock,
926                // but `StorageHandle` currently lacks a method for that.
927                return Err(GuardMgrConfigError::NoLock("bridges configured".into()));
928            }
929            (Some(current_bridges), true) if new_config.bridges() == current_bridges.as_ref() => {
930                assert_eq!(
931                    self.guards.active_set.universe_type(),
932                    UniverseType::BridgeSet
933                );
934                return Ok(RetireCircuits::None); // nothing to do.
935            }
936            (_, true) => {
937                self.configured_bridges = Some(new_config.bridges().into());
938                self.guards.active_set = GuardSetSelector::Bridges;
939            }
940            (_, false) => {
941                self.configured_bridges = None;
942                self.guards.active_set = GuardSetSelector::Default;
943            }
944        }
945
946        // If we have gotten here, we have changed the set of bridges, changed
947        // which set is active, or changed them both.  We need to make sure that
948        // our `GuardSet` object is up-to-date with our configuration.
949        self.update(wallclock, now);
950
951        // We also need to tell the caller that its circuits are no good any
952        // more.
953        //
954        // TODO(nickm): Someday we can do this more judiciously by retuning
955        // "Some" in the case where we're still using bridges but our new bridge
956        // set contains different elements; see comment on RetireCircuits.
957        //
958        // TODO(nickm): We could also safely return RetireCircuits::None if we
959        // are using bridges, and our new bridge list is a superset of the older
960        // one.
961        Ok(RetireCircuits::All)
962    }
963
964    /// Update our parameters, our selection (based on network parameters and
965    /// configuration), and make sure the active GuardSet has the right
966    /// configuration itself.
967    ///
968    /// We should call this whenever the NetDir's parameters change, or whenever
969    /// our filter changes.  We do not need to call it for new elements arriving
970    /// in our Universe, since those do not affect anything here.
971    ///
972    /// We should also call this whenever a new GuardSet becomes active for any
973    /// reason _other_ than just having called this function.
974    ///
975    /// (This function is only invoked from `update`, which should be called
976    /// under the above circumstances.)
977    fn update_active_set_params_and_filter(&mut self, netdir: Option<&NetDir>) {
978        // Set the parameters.  These always come from the NetDir, even if this
979        // is a bridge set.
980        if let Some(netdir) = netdir {
981            match GuardParams::try_from(netdir.params()) {
982                Ok(params) => self.params = params,
983                Err(e) => warn!("Unusable guard parameters from consensus: {}", e),
984            }
985
986            self.select_guard_set_based_on_filter(netdir);
987        }
988
989        // Change the filter, if it doesn't match what the guards have.
990        //
991        // TODO(nickm): We could use a "dirty" flag or something to decide
992        // whether we need to call set_filter, if this comparison starts to show
993        // up in profiles.
994        if self.guards.active_guards().filter() != &self.filter {
995            let restrictive = self.guards.active_set == GuardSetSelector::Restricted;
996            self.guards
997                .active_guards_mut()
998                .set_filter(self.filter.clone(), restrictive);
999        }
1000    }
1001
1002    /// Update the status of every guard in `active_guards`, and expand it as
1003    /// needed.
1004    ///
1005    /// This function doesn't take `&self`, to make sure that we are only
1006    /// affecting a single `GuardSet`, and to avoid confusing the borrow
1007    /// checker.
1008    ///
1009    /// We should call this whenever the contents of the universe have changed.
1010    ///
1011    /// We should also call this whenever a new GuardSet becomes active.
1012    fn update_guardset_internal<U: Universe>(
1013        params: &GuardParams,
1014        now: SystemTime,
1015        universe_type: UniverseType,
1016        active_guards: &mut GuardSet,
1017        universe: Option<&U>,
1018    ) -> ExtendedStatus {
1019        // Expire guards.  Do that early, in case doing so makes it clear that
1020        // we need to grab more guards or mark others as primary.
1021        active_guards.expire_old_guards(params, now);
1022
1023        let extended = if let Some(universe) = universe {
1024            // TODO: This check here may be completely unnecessary. I inserted
1025            // it back in 5ac0fcb7ef603e0d14 because I was originally concerned
1026            // it might be undesirable to list a primary guard as "missing dir
1027            // info" (and therefore unusable) if we were expecting to get its
1028            // microdescriptor "very soon."
1029            //
1030            // But due to the other check in `netdir_is_sufficient`, we
1031            // shouldn't be installing a netdir until it has microdescs for all
1032            // of the (non-bridge) primary guards that it lists. - nickm
1033            let n = active_guards.n_primary_without_id_info_in(universe);
1034            if n > 0 && universe_type == UniverseType::NetDir {
1035                // We are missing the information from a NetDir needed to see
1036                // whether our primary guards are listed, so we shouldn't update
1037                // our guard status.
1038                //
1039                // We don't want to do this check if we are using bridges, since
1040                // a missing bridge descriptor is not guaranteed to temporary
1041                // problem in the same way that a missing microdescriptor is.
1042                // (When a bridge desc is missing, the bridge could be down or
1043                // unreachable, and nobody else can help us. But if a microdesc
1044                // is missing, we just need to find a cache that has it.)
1045                trace!(
1046                    n_primary_without_id_info = n,
1047                    "Not extending guardset, missing information."
1048                );
1049                return ExtendedStatus::No;
1050            }
1051            active_guards.update_status_from_dir(universe);
1052            active_guards.extend_sample_as_needed(now, params, universe)
1053        } else {
1054            trace!("Not extending guardset, no universe given.");
1055            ExtendedStatus::No
1056        };
1057
1058        active_guards.select_primary_guards(params);
1059
1060        extended
1061    }
1062
1063    /// If using bridges, tell the BridgeDescProvider which descriptors we want.
1064    /// We need to check this *after* we select our primary guards.
1065    #[cfg(feature = "bridge-client")]
1066    fn update_desired_descriptors(&mut self, now: Instant) {
1067        if self.guards.active_set.universe_type() != UniverseType::BridgeSet {
1068            return;
1069        }
1070
1071        let provider = self.bridge_desc_provider.as_ref().and_then(Weak::upgrade);
1072        let bridge_set = self.latest_bridge_set();
1073        if let (Some(provider), Some(bridge_set)) = (provider, bridge_set) {
1074            let desired: Vec<_> = self
1075                .guards
1076                .active_guards()
1077                .descriptors_to_request(now, &self.params)
1078                .into_iter()
1079                .flat_map(|guard| bridge_set.bridge_by_guard(guard))
1080                .cloned()
1081                .collect();
1082
1083            provider.set_bridges(&desired);
1084        }
1085    }
1086
1087    /// Replace the active guard state with `new_state`, preserving
1088    /// non-persistent state for any guards that are retained.
1089    #[instrument(level = "trace", skip_all)]
1090    fn replace_guards_with(
1091        &mut self,
1092        mut new_guards: GuardSets,
1093        wallclock: SystemTime,
1094        now: Instant,
1095    ) {
1096        std::mem::swap(&mut self.guards, &mut new_guards);
1097        self.guards.copy_status_from(new_guards);
1098        self.update(wallclock, now);
1099    }
1100
1101    /// Update which guard set is active based on the current filter and the
1102    /// provided netdir.
1103    ///
1104    /// After calling this function, the new guard set's filter may be
1105    /// out-of-date: be sure to call `set_filter` as appropriate.
1106    fn select_guard_set_based_on_filter(&mut self, netdir: &NetDir) {
1107        // In general, we'd like to use the restricted set if we're under the
1108        // threshold, and the default set if we're over the threshold.  But if
1109        // we're sitting close to the threshold, we want to avoid flapping back
1110        // and forth, so we only change when we're more than 5% "off" from
1111        // whatever our current setting is.
1112        //
1113        // (See guard-spec section 2 for more information.)
1114        let offset = match self.guards.active_set {
1115            GuardSetSelector::Default => -0.05,
1116            GuardSetSelector::Restricted => 0.05,
1117            // If we're using bridges, then we don't switch between the other guard sets based on the filter at all.
1118            #[cfg(feature = "bridge-client")]
1119            GuardSetSelector::Bridges => return,
1120        };
1121        let frac_permitted = self.filter.frac_bw_permitted(netdir);
1122        let threshold = self.params.filter_threshold + offset;
1123        let new_choice = if frac_permitted < threshold {
1124            GuardSetSelector::Restricted
1125        } else {
1126            GuardSetSelector::Default
1127        };
1128
1129        if new_choice != self.guards.active_set {
1130            info!(
1131                "Guard selection changed; we are now using the {:?} guard set",
1132                &new_choice
1133            );
1134
1135            self.guards.active_set = new_choice;
1136
1137            if frac_permitted < self.params.extreme_threshold {
1138                warn!(
1139                    "The number of guards permitted is smaller than the recommended minimum of {:.0}%.",
1140                    self.params.extreme_threshold * 100.0,
1141                );
1142            }
1143        }
1144    }
1145
1146    /// Mark all of our primary guards as retriable, if we haven't done
1147    /// so since long enough before `now`.
1148    ///
1149    /// We want to call this function whenever a guard attempt succeeds,
1150    /// if the internet seemed to be down when the guard attempt was
1151    /// first launched.
1152    fn maybe_retry_primary_guards(&mut self, now: Instant) {
1153        // We don't actually want to mark our primary guards as
1154        // retriable more than once per internet_down_timeout: after
1155        // the first time, we would just be noticing the same "coming
1156        // back online" event more than once.
1157        let interval = self.params.internet_down_timeout;
1158        if self.last_primary_retry_time + interval <= now {
1159            debug!(
1160                "Successfully reached a guard after a while off the internet; marking all primary guards retriable."
1161            );
1162            self.guards
1163                .active_guards_mut()
1164                .mark_primary_guards_retriable();
1165            self.last_primary_retry_time = now;
1166        }
1167    }
1168
1169    /// Replace the current GuardFilter with `filter`.
1170    #[instrument(level = "trace", skip_all)]
1171    fn set_filter(&mut self, filter: GuardFilter, wallclock: SystemTime, now: Instant) {
1172        self.filter = filter;
1173        self.update(wallclock, now);
1174    }
1175
1176    /// Called when the circuit manager reports (via [`GuardMonitor`]) that
1177    /// a guard succeeded or failed.
1178    ///
1179    /// Changes the guard's status as appropriate, and updates the pending
1180    /// request as needed.
1181    #[allow(clippy::cognitive_complexity)]
1182    pub(crate) fn handle_msg(
1183        &mut self,
1184        request_id: RequestId,
1185        status: GuardStatus,
1186        skew: Option<ClockSkew>,
1187        runtime: &impl tor_rtcompat::SleepProvider,
1188    ) {
1189        if let Some(mut pending) = self.pending.remove(&request_id) {
1190            // If there was a pending request matching this RequestId, great!
1191            let guard_id = pending.guard_id();
1192            trace!(?guard_id, ?status, "Received report of guard status");
1193
1194            // First, handle the skew report (if any)
1195            if let Some(skew) = skew {
1196                let now = runtime.now();
1197                let observation = skew::SkewObservation { skew, when: now };
1198
1199                match &guard_id.0 {
1200                    FirstHopIdInner::Guard(_, id) => {
1201                        self.guards.active_guards_mut().record_skew(id, observation);
1202                    }
1203                    FirstHopIdInner::Fallback(id) => {
1204                        self.fallbacks.note_skew(id, observation);
1205                    }
1206                }
1207                // TODO: We call this whenever we receive an observed clock
1208                // skew. That's not the perfect timing for two reasons.  First
1209                // off, it might be too frequent: it does an O(n) calculation,
1210                // which isn't ideal.  Second, it might be too infrequent: after
1211                // an hour has passed, a given observation won't be up-to-date
1212                // any more, and we might want to recalculate the skew
1213                // accordingly.
1214                self.update_skew(now);
1215            }
1216
1217            match (status, &guard_id.0) {
1218                (GuardStatus::Failure, FirstHopIdInner::Fallback(id)) => {
1219                    // We used a fallback, and we weren't able to build a circuit through it.
1220                    self.fallbacks.note_failure(id, runtime.now());
1221                }
1222                (_, FirstHopIdInner::Fallback(_)) => {
1223                    // We don't record any other kind of circuit activity if we
1224                    // took the entry from the fallback list.
1225                }
1226                (GuardStatus::Success, FirstHopIdInner::Guard(sample, id)) => {
1227                    // If we had gone too long without any net activity when we
1228                    // gave out this guard, and now we're seeing a circuit
1229                    // succeed, tell the primary guards that they might be
1230                    // retriable.
1231                    if pending.net_has_been_down() {
1232                        self.maybe_retry_primary_guards(runtime.now());
1233                    }
1234
1235                    // The guard succeeded.  Tell the GuardSet.
1236                    self.guards.guards_mut(sample).record_success(
1237                        id,
1238                        &self.params,
1239                        None,
1240                        runtime.wallclock(),
1241                    );
1242                    // Either tell the request whether the guard is
1243                    // usable, or schedule it as a "waiting" request.
1244                    if let Some(usable) = self.guard_usability_status(&pending, runtime.now()) {
1245                        trace!(?guard_id, usable, "Known usability status");
1246                        pending.reply(usable);
1247                    } else {
1248                        // This is the one case where we can't use the
1249                        // guard yet.
1250                        trace!(?guard_id, "Not able to answer right now");
1251                        pending.mark_waiting(runtime.now());
1252                        self.waiting.push(pending);
1253                    }
1254                }
1255                (GuardStatus::Failure, FirstHopIdInner::Guard(sample, id)) => {
1256                    self.guards
1257                        .guards_mut(sample)
1258                        .record_failure(id, None, runtime.now());
1259                    pending.reply(false);
1260                }
1261                (GuardStatus::AttemptAbandoned, FirstHopIdInner::Guard(sample, id)) => {
1262                    self.guards.guards_mut(sample).record_attempt_abandoned(id);
1263                    pending.reply(false);
1264                }
1265                (GuardStatus::Indeterminate, FirstHopIdInner::Guard(sample, id)) => {
1266                    self.guards
1267                        .guards_mut(sample)
1268                        .record_indeterminate_result(id);
1269                    pending.reply(false);
1270                }
1271            };
1272        } else {
1273            warn!(
1274                "Got a status {:?} for a request {:?} that wasn't pending",
1275                status, request_id
1276            );
1277        }
1278
1279        // We might need to update the primary guards based on changes in the
1280        // status of guards above.
1281        self.guards
1282            .active_guards_mut()
1283            .select_primary_guards(&self.params);
1284
1285        // Some waiting request may just have become ready (usable or
1286        // not); we need to give them the information they're waiting
1287        // for.
1288        self.expire_and_answer_pending_requests(runtime.now());
1289    }
1290
1291    /// Helper to implement `GuardMgr::note_external_success()`.
1292    ///
1293    /// (This has to be a separate function so that we can borrow params while
1294    /// we have `mut self` borrowed.)
1295    fn record_external_success<T>(
1296        &mut self,
1297        identity: &T,
1298        external_activity: ExternalActivity,
1299        now: SystemTime,
1300    ) where
1301        T: tor_linkspec::HasRelayIds + ?Sized,
1302    {
1303        for id in self.lookup_ids(identity) {
1304            match &id.0 {
1305                FirstHopIdInner::Guard(sample, id) => {
1306                    self.guards.guards_mut(sample).record_success(
1307                        id,
1308                        &self.params,
1309                        Some(external_activity),
1310                        now,
1311                    );
1312                }
1313                FirstHopIdInner::Fallback(id) => {
1314                    if external_activity == ExternalActivity::DirCache {
1315                        self.fallbacks.note_success(id);
1316                    }
1317                }
1318            }
1319        }
1320    }
1321
1322    /// Return an iterator over all of the clock skew observations we've made
1323    /// for guards or fallbacks.
1324    fn skew_observations(&self) -> impl Iterator<Item = &skew::SkewObservation> {
1325        self.fallbacks
1326            .skew_observations()
1327            .chain(self.guards.active_guards().skew_observations())
1328    }
1329
1330    /// Recalculate our estimated clock skew, and publish it to anybody who
1331    /// cares.
1332    fn update_skew(&mut self, now: Instant) {
1333        let estimate = skew::SkewEstimate::estimate_skew(self.skew_observations(), now);
1334        // TODO: we might want to do this only conditionally, when the skew
1335        // estimate changes.
1336        *self.send_skew.borrow_mut() = estimate;
1337    }
1338
1339    /// If the circuit built because of a given [`PendingRequest`] may
1340    /// now be used (or discarded), return `Some(true)` or
1341    /// `Some(false)` respectively.
1342    ///
1343    /// Return None if we can't yet give an answer about whether such
1344    /// a circuit is usable.
1345    fn guard_usability_status(&self, pending: &PendingRequest, now: Instant) -> Option<bool> {
1346        match &pending.guard_id().0 {
1347            FirstHopIdInner::Guard(sample, id) => self.guards.guards(sample).circ_usability_status(
1348                id,
1349                pending.usage(),
1350                &self.params,
1351                now,
1352            ),
1353            // Fallback circuits are usable immediately, since we don't have to wait to
1354            // see whether any _other_ circuit succeeds or fails.
1355            FirstHopIdInner::Fallback(_) => Some(true),
1356        }
1357    }
1358
1359    /// For requests that have been "waiting" for an answer for too long,
1360    /// expire them and tell the circuit manager that their circuits
1361    /// are unusable.
1362    fn expire_and_answer_pending_requests(&mut self, now: Instant) {
1363        // A bit ugly: we use a separate Vec here to avoid borrowing issues,
1364        // and put it back when we're done.
1365        let mut waiting = Vec::new();
1366        std::mem::swap(&mut waiting, &mut self.waiting);
1367
1368        waiting.retain_mut(|pending| {
1369            let expired = pending
1370                .waiting_since()
1371                .and_then(|w| now.checked_duration_since(w))
1372                .map(|d| d >= self.params.np_idle_timeout)
1373                == Some(true);
1374            if expired {
1375                trace!(?pending, "Pending request expired");
1376                pending.reply(false);
1377                return false;
1378            }
1379
1380            // TODO-SPEC: guard_usability_status isn't what the spec says.  It
1381            // says instead that we should look at _circuit_ status, saying:
1382            //  "   Definition: In the algorithm above, C2 "blocks" C1 if:
1383            // * C2 obeys all the restrictions that C1 had to obey, AND
1384            // * C2 has higher priority than C1, AND
1385            // * Either C2 is <complete>, or C2 is <waiting_for_better_guard>,
1386            // or C2 has been <usable_if_no_better_guard> for no more than
1387            // {NONPRIMARY_GUARD_CONNECT_TIMEOUT} seconds."
1388            //
1389            // See comments in sample::GuardSet::circ_usability_status.
1390
1391            if let Some(answer) = self.guard_usability_status(pending, now) {
1392                trace!(?pending, answer, "Pending request now ready");
1393                pending.reply(answer);
1394                return false;
1395            }
1396            true
1397        });
1398
1399        // Put the waiting list back.
1400        std::mem::swap(&mut waiting, &mut self.waiting);
1401    }
1402
1403    /// Return every currently extant FirstHopId for a guard or fallback
1404    /// directory matching (or possibly matching) the provided keys.
1405    ///
1406    /// An identity is _possibly matching_ if it contains some of the IDs in the
1407    /// provided identity, and it has no _contradictory_ identities, but it does
1408    /// not necessarily contain _all_ of those identities.
1409    ///
1410    /// # TODO
1411    ///
1412    /// This function should probably not exist; it's only used so that dirmgr
1413    /// can report successes or failures, since by the time it observes them it
1414    /// doesn't know whether its circuit came from a guard or a fallback.  To
1415    /// solve that, we'll need CircMgr to record and report which one it was
1416    /// using, which will take some more plumbing.
1417    ///
1418    /// TODO relay: we will have to make the change above when we implement
1419    /// relays; otherwise, it would be possible for an attacker to exploit it to
1420    /// mislead us about our guard status.
1421    fn lookup_ids<T>(&self, identity: &T) -> Vec<FirstHopId>
1422    where
1423        T: tor_linkspec::HasRelayIds + ?Sized,
1424    {
1425        use strum::IntoEnumIterator;
1426        let mut vec = Vec::with_capacity(2);
1427
1428        let id = ids::GuardId::from_relay_ids(identity);
1429        for sample in GuardSetSelector::iter() {
1430            let guard_id = match self.guards.guards(&sample).contains(&id) {
1431                Ok(true) => &id,
1432                Err(other) => other,
1433                Ok(false) => continue,
1434            };
1435            vec.push(FirstHopId(FirstHopIdInner::Guard(sample, guard_id.clone())));
1436        }
1437
1438        let id = ids::FallbackId::from_relay_ids(identity);
1439        if self.fallbacks.contains(&id) {
1440            vec.push(id.into());
1441        }
1442
1443        vec
1444    }
1445
1446    /// Run any periodic events that update guard status, and return a
1447    /// duration after which periodic events should next be run.
1448    #[instrument(skip_all, level = "trace")]
1449    pub(crate) fn run_periodic_events(&mut self, wallclock: SystemTime, now: Instant) -> Duration {
1450        self.update(wallclock, now);
1451        self.expire_and_answer_pending_requests(now);
1452        Duration::from_secs(1) // TODO: Too aggressive.
1453    }
1454
1455    /// Try to select a guard, expanding the sample if the first attempt fails.
1456    #[instrument(skip_all, level = "trace")]
1457    fn select_guard_with_expand(
1458        &mut self,
1459        usage: &GuardUsage,
1460        now: Instant,
1461        wallclock: SystemTime,
1462    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1463        // Try to find a guard.
1464        let first_error = match self.select_guard_once(usage, now) {
1465            Ok(res1) => return Ok(res1),
1466            Err(e) => {
1467                trace!("Couldn't select guard on first attempt: {}", e);
1468                e
1469            }
1470        };
1471
1472        // That didn't work. If we have a netdir, expand the sample and try again.
1473        let res = self.with_opt_universe(|this, univ| {
1474            let univ = univ?;
1475            trace!("No guards available, trying to extend the sample.");
1476            // Make sure that the status on all of our guards are accurate, and
1477            // expand the sample if we can.
1478            //
1479            // Our parameters and configuration did not change, so we do not
1480            // need to call update() or update_active_set_and_filter(). This
1481            // call is sufficient to  extend the sample and recompute primary
1482            // guards.
1483            let extended = Self::update_guardset_internal(
1484                &this.params,
1485                wallclock,
1486                this.guards.active_set.universe_type(),
1487                this.guards.active_guards_mut(),
1488                Some(univ),
1489            );
1490            if extended == ExtendedStatus::Yes {
1491                match this.select_guard_once(usage, now) {
1492                    Ok(res) => return Some(res),
1493                    Err(e) => {
1494                        trace!("Couldn't select guard after update: {}", e);
1495                    }
1496                }
1497            }
1498            None
1499        });
1500        if let Some(res) = res {
1501            return Ok(res);
1502        }
1503
1504        // Okay, that didn't work either.  If we were asked for a directory
1505        // guard, and we aren't using bridges, then we may be able to use a
1506        // fallback.
1507        if usage.kind == GuardUsageKind::OneHopDirectory
1508            && self.guards.active_set.universe_type() == UniverseType::NetDir
1509        {
1510            return self.select_fallback(now);
1511        }
1512
1513        // Couldn't extend the sample or use a fallback; return the original error.
1514        Err(first_error)
1515    }
1516
1517    /// Helper: try to pick a single guard, without retrying on failure.
1518    fn select_guard_once(
1519        &self,
1520        usage: &GuardUsage,
1521        now: Instant,
1522    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1523        let active_set = &self.guards.active_set;
1524        #[cfg_attr(not(feature = "bridge-client"), allow(unused_mut))]
1525        let (list_kind, mut first_hop) =
1526            self.guards
1527                .guards(active_set)
1528                .pick_guard(active_set, usage, &self.params, now)?;
1529        #[cfg(feature = "bridge-client")]
1530        if self.guards.active_set.universe_type() == UniverseType::BridgeSet {
1531            // See if we can promote first_hop to a viable CircTarget.
1532            let bridges = self.latest_bridge_set().ok_or_else(|| {
1533                PickGuardError::Internal(internal!(
1534                    "No bridge set available, even though this is the Bridges sample"
1535                ))
1536            })?;
1537            first_hop.lookup_bridge_circ_target(&bridges);
1538
1539            if usage.kind == GuardUsageKind::Data && !first_hop.contains_circ_target() {
1540                return Err(PickGuardError::Internal(internal!(
1541                    "Tried to return a non-circtarget guard with Data usage!"
1542                )));
1543            }
1544        }
1545        Ok((list_kind, first_hop))
1546    }
1547
1548    /// Helper: Select a fallback directory.
1549    ///
1550    /// Called when we have no guard information to use. Return values are as
1551    /// for [`GuardMgr::select_guard()`]
1552    fn select_fallback(
1553        &self,
1554        now: Instant,
1555    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1556        let filt = self.guards.active_guards().filter();
1557
1558        let fallback = crate::FirstHop {
1559            sample: None,
1560            inner: crate::FirstHopInner::Chan(OwnedChanTarget::from_chan_target(
1561                self.fallbacks.choose(&mut rand::rng(), now, filt)?,
1562            )),
1563        };
1564        let fallback = filt.modify_hop(fallback)?;
1565        Ok((sample::ListKind::Fallback, fallback))
1566    }
1567}
1568
1569/// A possible outcome of trying to extend a guard sample.
1570#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1571enum ExtendedStatus {
1572    /// The guard sample was extended. (At least one guard was added to it.)
1573    Yes,
1574    /// The guard sample was not extended.
1575    No,
1576}
1577
1578/// A set of parameters, derived from the consensus document, controlling
1579/// the behavior of a guard manager.
1580#[derive(Debug, Clone)]
1581#[cfg_attr(test, derive(PartialEq))]
1582struct GuardParams {
1583    /// How long should a sampled, un-confirmed guard be kept in the sample before it expires?
1584    lifetime_unconfirmed: Duration,
1585    /// How long should a confirmed guard be kept in the sample before
1586    /// it expires?
1587    lifetime_confirmed: Duration,
1588    /// How long may  a guard be unlisted before we remove it from the sample?
1589    lifetime_unlisted: Duration,
1590    /// Largest number of guards we're willing to add to the sample.
1591    max_sample_size: usize,
1592    /// Largest fraction of the network's guard bandwidth that we're
1593    /// willing to add to the sample.
1594    max_sample_bw_fraction: f64,
1595    /// Smallest number of guards that we're willing to have in the
1596    /// sample, after applying a [`GuardFilter`].
1597    min_filtered_sample_size: usize,
1598    /// How many guards are considered "Primary"?
1599    n_primary: usize,
1600    /// When making a regular circuit, how many primary guards should we
1601    /// be willing to try?
1602    data_parallelism: usize,
1603    /// When making a one-hop directory circuit, how many primary
1604    /// guards should we be willing to try?
1605    dir_parallelism: usize,
1606    /// For how long does a pending attempt to connect to a guard
1607    /// block an attempt to use a less-favored non-primary guard?
1608    np_connect_timeout: Duration,
1609    /// How long do we allow a circuit to a successful but unfavored
1610    /// non-primary guard to sit around before deciding not to use it?
1611    np_idle_timeout: Duration,
1612    /// After how much time without successful activity does a
1613    /// successful circuit indicate that we should retry our primary
1614    /// guards?
1615    internet_down_timeout: Duration,
1616    /// What fraction of the guards can be can be filtered out before we
1617    /// decide that our filter is "very restrictive"?
1618    filter_threshold: f64,
1619    /// What fraction of the guards determine that our filter is "very
1620    /// restrictive"?
1621    extreme_threshold: f64,
1622}
1623
1624impl Default for GuardParams {
1625    fn default() -> Self {
1626        let one_day = Duration::from_secs(86400);
1627        GuardParams {
1628            lifetime_unconfirmed: one_day * 120,
1629            lifetime_confirmed: one_day * 60,
1630            lifetime_unlisted: one_day * 20,
1631            max_sample_size: 60,
1632            max_sample_bw_fraction: 0.2,
1633            min_filtered_sample_size: 20,
1634            n_primary: 3,
1635            data_parallelism: 1,
1636            dir_parallelism: 3,
1637            np_connect_timeout: Duration::from_secs(15),
1638            np_idle_timeout: Duration::from_secs(600),
1639            internet_down_timeout: Duration::from_secs(600),
1640            filter_threshold: 0.2,
1641            extreme_threshold: 0.01,
1642        }
1643    }
1644}
1645
1646impl TryFrom<&NetParameters> for GuardParams {
1647    type Error = tor_units::Error;
1648    fn try_from(p: &NetParameters) -> Result<GuardParams, Self::Error> {
1649        Ok(GuardParams {
1650            lifetime_unconfirmed: p.guard_lifetime_unconfirmed.try_into()?,
1651            lifetime_confirmed: p.guard_lifetime_confirmed.try_into()?,
1652            lifetime_unlisted: p.guard_remove_unlisted_after.try_into()?,
1653            max_sample_size: p.guard_max_sample_size.try_into()?,
1654            max_sample_bw_fraction: p.guard_max_sample_threshold.as_fraction(),
1655            min_filtered_sample_size: p.guard_filtered_min_sample_size.try_into()?,
1656            n_primary: p.guard_n_primary.try_into()?,
1657            data_parallelism: p.guard_use_parallelism.try_into()?,
1658            dir_parallelism: p.guard_dir_use_parallelism.try_into()?,
1659            np_connect_timeout: p.guard_nonprimary_connect_timeout.try_into()?,
1660            np_idle_timeout: p.guard_nonprimary_idle_timeout.try_into()?,
1661            internet_down_timeout: p.guard_internet_likely_down.try_into()?,
1662            filter_threshold: p.guard_meaningful_restriction.as_fraction(),
1663            extreme_threshold: p.guard_extreme_restriction.as_fraction(),
1664        })
1665    }
1666}
1667
1668/// Representation of a guard or fallback, as returned by [`GuardMgr::select_guard()`].
1669#[derive(Debug, Clone)]
1670pub struct FirstHop {
1671    /// The sample from which this guard was taken, or `None` if this is a fallback.
1672    sample: Option<GuardSetSelector>,
1673    /// Information about connecting to (or through) this guard.
1674    inner: FirstHopInner,
1675}
1676/// The enumeration inside a FirstHop that holds information about how to
1677/// connect to (and possibly through) a guard or fallback.
1678#[derive(Debug, Clone)]
1679enum FirstHopInner {
1680    /// We have enough information to connect to a guard.
1681    Chan(OwnedChanTarget),
1682    /// We have enough information to connect to a guards _and_ to build
1683    /// multihop circuits through it.
1684    #[cfg_attr(not(feature = "bridge-client"), allow(dead_code))]
1685    Circ(OwnedCircTarget),
1686}
1687
1688impl FirstHop {
1689    /// Return a new [`FirstHopId`] for this `FirstHop`.
1690    fn first_hop_id(&self) -> FirstHopId {
1691        match &self.sample {
1692            Some(sample) => {
1693                let guard_id = GuardId::from_relay_ids(self);
1694                FirstHopId::in_sample(sample.clone(), guard_id)
1695            }
1696            None => {
1697                let fallback_id = crate::ids::FallbackId::from_relay_ids(self);
1698                FirstHopId::from(fallback_id)
1699            }
1700        }
1701    }
1702
1703    /// Look up this guard in `netdir`.
1704    pub fn get_relay<'a>(&self, netdir: &'a NetDir) -> Option<Relay<'a>> {
1705        match &self.sample {
1706            #[cfg(feature = "bridge-client")]
1707            // Always return "None" for anything that isn't in the netdir.
1708            Some(s) if s.universe_type() == UniverseType::BridgeSet => None,
1709            // Otherwise ask the netdir.
1710            _ => netdir.by_ids(self),
1711        }
1712    }
1713
1714    /// Return true if this guard is a bridge.
1715    pub fn is_bridge(&self) -> bool {
1716        match &self.sample {
1717            #[cfg(feature = "bridge-client")]
1718            Some(s) if s.universe_type() == UniverseType::BridgeSet => true,
1719            _ => false,
1720        }
1721    }
1722
1723    /// If possible, return a view of this object that can be used to build a circuit.
1724    pub fn as_circ_target(&self) -> Option<&OwnedCircTarget> {
1725        match &self.inner {
1726            FirstHopInner::Chan(_) => None,
1727            FirstHopInner::Circ(ct) => Some(ct),
1728        }
1729    }
1730
1731    /// Return a view of this as an OwnedChanTarget.
1732    fn chan_target_mut(&mut self) -> &mut OwnedChanTarget {
1733        match &mut self.inner {
1734            FirstHopInner::Chan(ct) => ct,
1735            FirstHopInner::Circ(ct) => ct.chan_target_mut(),
1736        }
1737    }
1738
1739    /// If possible and appropriate, find a circuit target in `bridges` for this
1740    /// `FirstHop`, and make this `FirstHop` a viable circuit target.
1741    ///
1742    /// (By default, any `FirstHop` that a `GuardSet` returns will have enough
1743    /// information to be a `ChanTarget`, but it will be lacking the additional
1744    /// network information in `CircTarget`[^1] necessary for us to build a
1745    /// multi-hop circuit through it.  If this FirstHop is a regular non-bridge
1746    /// `Relay`, then the `CircMgr` will later look up that circuit information
1747    /// itself from the network directory. But if this `FirstHop` *is* a bridge,
1748    /// then we need to find that information in the `BridgeSet`, since the
1749    /// CircMgr does not keep track of the `BridgeSet`.)
1750    ///
1751    /// [^1]: For example, supported protocol versions and ntor keys.
1752    #[cfg(feature = "bridge-client")]
1753    fn lookup_bridge_circ_target(&mut self, bridges: &bridge::BridgeSet) {
1754        use crate::sample::CandidateStatus::Present;
1755        if self.sample.as_ref().map(|s| s.universe_type()) == Some(UniverseType::BridgeSet)
1756            && matches!(self.inner, FirstHopInner::Chan(_))
1757        {
1758            if let Present(bridge_relay) = bridges.bridge_relay_by_guard(self) {
1759                if let Some(circ_target) = bridge_relay.as_relay_with_desc() {
1760                    self.inner =
1761                        FirstHopInner::Circ(OwnedCircTarget::from_circ_target(&circ_target));
1762                }
1763            }
1764        }
1765    }
1766
1767    /// Return true if this `FirstHop` contains circuit target information.
1768    ///
1769    /// This is true if `lookup_bridge_circ_target()` has been called, and it
1770    /// successfully found the circuit target information.
1771    #[cfg(feature = "bridge-client")]
1772    fn contains_circ_target(&self) -> bool {
1773        matches!(self.inner, FirstHopInner::Circ(_))
1774    }
1775}
1776
1777// This is somewhat redundant with the implementations in crate::guard::Guard.
1778impl tor_linkspec::HasAddrs for FirstHop {
1779    fn addrs(&self) -> impl Iterator<Item = SocketAddr> {
1780        match &self.inner {
1781            FirstHopInner::Chan(ct) => Either::Left(ct.addrs()),
1782            FirstHopInner::Circ(ct) => Either::Right(ct.addrs()),
1783        }
1784    }
1785}
1786impl tor_linkspec::HasRelayIds for FirstHop {
1787    fn identity(
1788        &self,
1789        key_type: tor_linkspec::RelayIdType,
1790    ) -> Option<tor_linkspec::RelayIdRef<'_>> {
1791        match &self.inner {
1792            FirstHopInner::Chan(ct) => ct.identity(key_type),
1793            FirstHopInner::Circ(ct) => ct.identity(key_type),
1794        }
1795    }
1796}
1797impl tor_linkspec::HasChanMethod for FirstHop {
1798    fn chan_method(&self) -> tor_linkspec::ChannelMethod {
1799        match &self.inner {
1800            FirstHopInner::Chan(ct) => ct.chan_method(),
1801            FirstHopInner::Circ(ct) => ct.chan_method(),
1802        }
1803    }
1804}
1805impl tor_linkspec::ChanTarget for FirstHop {}
1806
1807/// The purpose for which we plan to use a guard.
1808///
1809/// This can affect the guard selection algorithm.
1810#[derive(Clone, Debug, Default, Eq, PartialEq)]
1811#[non_exhaustive]
1812pub enum GuardUsageKind {
1813    /// We want to use this guard for a data circuit.
1814    ///
1815    /// (This encompasses everything except the `OneHopDirectory` case.)
1816    #[default]
1817    Data,
1818    /// We want to use this guard for a one-hop, non-anonymous
1819    /// directory request.
1820    ///
1821    /// (Our algorithm allows more parallelism for the guards that we use
1822    /// for these circuits.)
1823    OneHopDirectory,
1824}
1825
1826/// A set of parameters describing how a single guard should be selected.
1827///
1828/// Used as an argument to [`GuardMgr::select_guard`].
1829#[derive(Clone, Debug, derive_builder::Builder)]
1830#[builder(build_fn(error = "tor_config::ConfigBuildError"))]
1831pub struct GuardUsage {
1832    /// The purpose for which this guard will be used.
1833    #[builder(default)]
1834    kind: GuardUsageKind,
1835    /// A list of restrictions on which guard may be used.
1836    ///
1837    /// The default is the empty list.
1838    #[builder(sub_builder, setter(custom))]
1839    restrictions: GuardRestrictionList,
1840}
1841
1842impl_standard_builder! { GuardUsage: !Deserialize }
1843
1844/// List of socket restrictions, as configured
1845pub type GuardRestrictionList = Vec<GuardRestriction>;
1846
1847define_list_builder_helper! {
1848    pub struct GuardRestrictionListBuilder {
1849        restrictions: [GuardRestriction],
1850    }
1851    built: GuardRestrictionList = restrictions;
1852    default = vec![];
1853    item_build: |restriction| Ok(restriction.clone());
1854    item_apply_defaults: |_| Ok::<_, tor_config::ConfigBuildError>(());
1855}
1856
1857define_list_builder_accessors! {
1858    struct GuardUsageBuilder {
1859        pub restrictions: [GuardRestriction],
1860    }
1861}
1862
1863impl GuardUsageBuilder {
1864    /// Create a new empty [`GuardUsageBuilder`].
1865    pub fn new() -> Self {
1866        Self::default()
1867    }
1868}
1869
1870/// A restriction that applies to a single request for a guard.
1871///
1872/// Restrictions differ from filters (see [`GuardFilter`]) in that
1873/// they apply to single requests, not to our entire set of guards.
1874/// They're suitable for things like making sure that we don't start
1875/// and end a circuit at the same relay, or requiring a specific
1876/// subprotocol version for certain kinds of requests.
1877#[derive(Clone, Debug, Serialize, Deserialize)]
1878#[non_exhaustive]
1879pub enum GuardRestriction {
1880    /// Don't pick a guard with the provided identity.
1881    AvoidId(RelayId),
1882    /// Don't pick a guard with any of the provided Ed25519 identities.
1883    AvoidAllIds(RelayIdSet),
1884}
1885
1886/// The kind of vanguards to use.
1887#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1888#[derive(Serialize, Deserialize)] //
1889#[derive(derive_more::Display)] //
1890#[serde(rename_all = "lowercase")]
1891#[cfg(feature = "vanguards")]
1892#[non_exhaustive]
1893pub enum VanguardMode {
1894    /// "Lite" vanguards.
1895    #[default]
1896    #[display("lite")]
1897    Lite = 1,
1898    /// "Full" vanguards.
1899    #[display("full")]
1900    Full = 2,
1901    /// Vanguards are disabled.
1902    #[display("disabled")]
1903    Disabled = 0,
1904}
1905
1906#[cfg(feature = "vanguards")]
1907impl VanguardMode {
1908    /// Build a `VanguardMode` from a [`NetParameters`] parameter.
1909    ///
1910    /// Used for converting [`vanguards_enabled`](NetParameters::vanguards_enabled)
1911    /// or [`vanguards_hs_service`](NetParameters::vanguards_hs_service)
1912    /// to the corresponding `VanguardMode`.
1913    pub(crate) fn from_net_parameter(val: BoundedInt32<0, 2>) -> Self {
1914        match val.get() {
1915            0 => VanguardMode::Disabled,
1916            1 => VanguardMode::Lite,
1917            2 => VanguardMode::Full,
1918            _ => unreachable!("BoundedInt32 was not bounded?!"),
1919        }
1920    }
1921}
1922
1923impl_not_auto_value!(VanguardMode);
1924
1925/// Vanguards configuration.
1926#[derive(Deftly, Clone, Debug, PartialEq, Eq)]
1927#[derive_deftly(TorConfig)]
1928pub struct VanguardConfig {
1929    /// The kind of vanguards to use.
1930    #[deftly(tor_config(default))]
1931    mode: ExplicitOrAuto<VanguardMode>,
1932}
1933
1934impl VanguardConfig {
1935    /// Return the configured [`VanguardMode`].
1936    ///
1937    /// Returns the [`Default`] `VanguardMode`
1938    /// if the mode is [`Auto`](ExplicitOrAuto) or unspecified.
1939    pub fn mode(&self) -> VanguardMode {
1940        match self.mode {
1941            ExplicitOrAuto::Auto => Default::default(),
1942            ExplicitOrAuto::Explicit(mode) => mode,
1943        }
1944    }
1945}
1946
1947/// The kind of vanguards to use.
1948#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1949#[derive(Serialize, Deserialize)] //
1950#[derive(derive_more::Display)] //
1951#[serde(rename_all = "lowercase")]
1952#[cfg(not(feature = "vanguards"))]
1953#[non_exhaustive]
1954pub enum VanguardMode {
1955    /// Vanguards are disabled.
1956    #[default]
1957    #[display("disabled")]
1958    Disabled = 0,
1959}
1960
1961#[cfg(test)]
1962mod test {
1963    // @@ begin test lint list maintained by maint/add_warning @@
1964    #![allow(clippy::bool_assert_comparison)]
1965    #![allow(clippy::clone_on_copy)]
1966    #![allow(clippy::dbg_macro)]
1967    #![allow(clippy::mixed_attributes_style)]
1968    #![allow(clippy::print_stderr)]
1969    #![allow(clippy::print_stdout)]
1970    #![allow(clippy::single_char_pattern)]
1971    #![allow(clippy::unwrap_used)]
1972    #![allow(clippy::unchecked_time_subtraction)]
1973    #![allow(clippy::useless_vec)]
1974    #![allow(clippy::needless_pass_by_value)]
1975    #![allow(clippy::string_slice)] // See arti#2571
1976    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
1977    use super::*;
1978    use itertools::Itertools;
1979    use tor_linkspec::{HasAddrs, HasRelayIds};
1980    use tor_persist::TestingStateMgr;
1981    use tor_rtcompat::test_with_all_runtimes;
1982
1983    #[test]
1984    fn guard_param_defaults() {
1985        let p1 = GuardParams::default();
1986        let p2: GuardParams = (&NetParameters::default()).try_into().unwrap();
1987        assert_eq!(p1, p2);
1988    }
1989
1990    fn init<R: Runtime>(rt: R) -> (GuardMgr<R>, TestingStateMgr, NetDir) {
1991        use tor_netdir::{MdReceiver, PartialNetDir, testnet};
1992        let statemgr = TestingStateMgr::new();
1993        let have_lock = statemgr.try_lock().unwrap();
1994        assert!(have_lock.held());
1995        let guardmgr = GuardMgr::new(rt, statemgr.clone(), &TestConfig::default()).unwrap();
1996        let (con, mds) = testnet::construct_network().unwrap();
1997        let param_overrides = vec![
1998            // We make the sample size smaller than usual to compensate for the
1999            // small testing network.  (Otherwise, we'd sample the whole network,
2000            // and not be able to observe guards in the tests.)
2001            "guard-min-filtered-sample-size=5",
2002            // We choose only two primary guards, to make the tests easier to write.
2003            "guard-n-primary-guards=2",
2004            // We define any restriction that allows 75% or fewer of relays as "meaningful",
2005            // so that we can test the "restrictive" guard sample behavior, and to avoid
2006            "guard-meaningful-restriction-percent=75",
2007        ];
2008        let param_overrides: String = param_overrides.into_iter().join(" ");
2009        let override_p = param_overrides.parse().unwrap();
2010        let mut netdir = PartialNetDir::new(con, Some(&override_p));
2011        for md in mds {
2012            netdir.add_microdesc(md);
2013        }
2014        let netdir = netdir.unwrap_if_sufficient().unwrap();
2015
2016        (guardmgr, statemgr, netdir)
2017    }
2018
2019    #[test]
2020    #[allow(clippy::clone_on_copy)]
2021    fn simple_case() {
2022        test_with_all_runtimes!(|rt| async move {
2023            let (guardmgr, statemgr, netdir) = init(rt.clone());
2024            let usage = GuardUsage::default();
2025            guardmgr.install_test_netdir(&netdir);
2026
2027            let (id, mon, usable) = guardmgr.select_guard(usage).unwrap();
2028            // Report that the circuit succeeded.
2029            mon.succeeded();
2030
2031            // May we use the circuit?
2032            let usable = usable.await.unwrap();
2033            assert!(usable);
2034
2035            // Save the state...
2036            guardmgr.flush_msg_queue().await;
2037            guardmgr.store_persistent_state().unwrap();
2038            drop(guardmgr);
2039
2040            // Try reloading from the state...
2041            let guardmgr2 =
2042                GuardMgr::new(rt.clone(), statemgr.clone(), &TestConfig::default()).unwrap();
2043            guardmgr2.install_test_netdir(&netdir);
2044
2045            // Since the guard was confirmed, we should get the same one this time!
2046            let usage = GuardUsage::default();
2047            let (id2, _mon, _usable) = guardmgr2.select_guard(usage).unwrap();
2048            assert!(id2.same_relay_ids(&id));
2049        });
2050    }
2051
2052    #[test]
2053    fn simple_waiting() {
2054        // TODO(nickm): This test fails in rare cases; I suspect a
2055        // race condition somewhere.
2056        //
2057        // I've doubled up on the queue flushing in order to try to make the
2058        // race less likely, but we should investigate.
2059        test_with_all_runtimes!(|rt| async move {
2060            let (guardmgr, _statemgr, netdir) = init(rt);
2061            let u = GuardUsage::default();
2062            guardmgr.install_test_netdir(&netdir);
2063
2064            // We'll have the first two guard fail, which should make us
2065            // try a non-primary guard.
2066            let (id1, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2067            mon.failed();
2068            guardmgr.flush_msg_queue().await; // avoid race
2069            guardmgr.flush_msg_queue().await; // avoid race
2070            let (id2, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2071            mon.failed();
2072            guardmgr.flush_msg_queue().await; // avoid race
2073            guardmgr.flush_msg_queue().await; // avoid race
2074
2075            assert!(!id1.same_relay_ids(&id2));
2076
2077            // Now we should get two sampled guards. They should be different.
2078            let (id3, mon3, usable3) = guardmgr.select_guard(u.clone()).unwrap();
2079            let (id4, mon4, usable4) = guardmgr.select_guard(u.clone()).unwrap();
2080            assert!(!id3.same_relay_ids(&id4));
2081
2082            let (u3, u4) = futures::join!(
2083                async {
2084                    mon3.failed();
2085                    guardmgr.flush_msg_queue().await; // avoid race
2086                    usable3.await.unwrap()
2087                },
2088                async {
2089                    mon4.succeeded();
2090                    usable4.await.unwrap()
2091                }
2092            );
2093
2094            assert_eq!((u3, u4), (false, true));
2095        });
2096    }
2097
2098    #[test]
2099    fn filtering_basics() {
2100        test_with_all_runtimes!(|rt| async move {
2101            let (guardmgr, _statemgr, netdir) = init(rt);
2102            let u = GuardUsage::default();
2103            let filter = {
2104                let mut f = GuardFilter::default();
2105                // All the addresses in the test network are {0,1,2,3,4}.0.0.3:9001.
2106                // Limit to only 2.0.0.0/8
2107                f.push_reachable_addresses(vec!["2.0.0.0/8:9001".parse().unwrap()]);
2108                f
2109            };
2110            guardmgr.set_filter(filter);
2111            guardmgr.install_test_netdir(&netdir);
2112            let (guard, _mon, _usable) = guardmgr.select_guard(u).unwrap();
2113            // Make sure that the filter worked.
2114            let addr = guard.addrs().next().unwrap();
2115            assert_eq!(addr, "2.0.0.3:9001".parse().unwrap());
2116        });
2117    }
2118
2119    #[test]
2120    fn external_status() {
2121        test_with_all_runtimes!(|rt| async move {
2122            let (guardmgr, _statemgr, netdir) = init(rt);
2123            let data_usage = GuardUsage::default();
2124            let dir_usage = GuardUsageBuilder::new()
2125                .kind(GuardUsageKind::OneHopDirectory)
2126                .build()
2127                .unwrap();
2128            guardmgr.install_test_netdir(&netdir);
2129            {
2130                // Override this parameter, so that we can get deterministic results below.
2131                let mut inner = guardmgr.inner.lock().unwrap();
2132                inner.params.dir_parallelism = 1;
2133            }
2134
2135            let (guard, mon, _usable) = guardmgr.select_guard(data_usage.clone()).unwrap();
2136            mon.succeeded();
2137
2138            // Record that this guard gave us a bad directory object.
2139            guardmgr.note_external_failure(&guard, ExternalActivity::DirCache);
2140
2141            // We ask for another guard, for data usage.  We should get the same
2142            // one as last time, since the director failure doesn't mean this
2143            // guard is useless as a primary guard.
2144            let (g2, mon, _usable) = guardmgr.select_guard(data_usage).unwrap();
2145            assert_eq!(g2.ed_identity(), guard.ed_identity());
2146            mon.succeeded();
2147
2148            // But if we ask for a guard for directory usage, we should get a
2149            // different one, since the last guard we gave out failed.
2150            let (g3, mon, _usable) = guardmgr.select_guard(dir_usage.clone()).unwrap();
2151            assert_ne!(g3.ed_identity(), guard.ed_identity());
2152            mon.succeeded();
2153
2154            // Now record a success for directory usage.
2155            guardmgr.note_external_success(&guard, ExternalActivity::DirCache);
2156
2157            // Now that the guard is working as a cache, asking for it should get us the same guard.
2158            let (g4, _mon, _usable) = guardmgr.select_guard(dir_usage).unwrap();
2159            assert_eq!(g4.ed_identity(), guard.ed_identity());
2160        });
2161    }
2162
2163    #[cfg(feature = "vanguards")]
2164    #[test]
2165    fn vanguard_mode_ord() {
2166        assert!(VanguardMode::Disabled < VanguardMode::Lite);
2167        assert!(VanguardMode::Disabled < VanguardMode::Full);
2168        assert!(VanguardMode::Lite < VanguardMode::Full);
2169    }
2170}