tor_hsservice/status.rs
1//! Support for reporting the status of an onion service.
2
3use crate::internal_prelude::*;
4
5/// The current reported status of an onion service.
6#[derive(Debug, Clone, Eq, PartialEq)]
7pub struct OnionServiceStatus {
8 /// The current high-level state for the IPT manager.
9 ipt_mgr: ComponentStatus,
10
11 /// The current high-level state for the descriptor publisher.
12 publisher: ComponentStatus,
13
14 /// The current high-level state for the PoW manager.
15 #[cfg(feature = "hs-pow-full")]
16 pow_manager: ComponentStatus,
17 // TODO (#1194): Add key expiration
18 //
19 // NOTE: Do _not_ add general metrics (like failure/success rates , number
20 // of intro points, etc) here.
21}
22
23/// The current reported status of an onion service subsystem.
24#[derive(Debug, Clone)]
25pub(crate) struct ComponentStatus {
26 /// The current high-level state.
27 state: State,
28
29 /// The last error we have seen.
30 latest_error: Option<Problem>,
31}
32
33impl ComponentStatus {
34 /// Create a new ComponentStatus for a component that has not been bootstrapped.
35 fn new_shutdown() -> Self {
36 Self {
37 state: State::Shutdown,
38 latest_error: None,
39 }
40 }
41}
42
43impl PartialEq for ComponentStatus {
44 fn eq(&self, other: &Self) -> bool {
45 let Self {
46 state,
47 latest_error,
48 } = self;
49 let Self {
50 state: state_other,
51 latest_error: lastest_error_other,
52 } = other;
53
54 // NOTE: Errors are never equal. We _could_ add half-baked PartialEq implementations for
55 // all of our error types, but it doesn't seem worth it. If there is a state change, or if
56 // we've encountered an error (even if it's the same as the previous one), we'll notify the
57 // watchers.
58 state == state_other && latest_error.is_none() && lastest_error_other.is_none()
59 }
60}
61
62impl Eq for ComponentStatus {}
63
64/// The high-level state of an onion service.
65///
66/// This type summarizes the most basic information about an onion service's
67/// status.
68#[derive(Copy, Clone, Debug, Eq, PartialEq)]
69#[non_exhaustive]
70pub enum State {
71 /// The service is not launched.
72 ///
73 /// Either [`OnionService::launch`](crate::OnionService::launch) has not
74 /// been called, or the service has been shut down.
75 ///
76 /// ## Reachability
77 ///
78 /// The service is not reachable.
79 Shutdown,
80 /// The service is bootstrapping.
81 ///
82 /// Specifically, we have been offline, or we just initialized:
83 /// We are trying to build introduction points and publish a descriptor,
84 /// and haven't hit any significant problems yet.
85 ///
86 /// ## Reachability
87 ///
88 /// The service is not fully reachable, but may be reachable by some clients.
89 Bootstrapping,
90 /// The service is running in a degraded state.
91 ///
92 /// Specifically, we are not satisfied with our introduction points, but
93 /// we do have a number of working introduction points,
94 /// and our descriptor is up-to-date.
95 ///
96 /// ## Reachability
97 ///
98 /// The service is reachable.
99 ///
100 // TODO: this variant is only used by the IptManager.
101 // We should split this enum into IptManagerState and PublisherState.
102 DegradedReachable,
103 /// The service is running in a degraded state.
104 ///
105 /// Specifically, we have a number of working introduction points,
106 /// but we have failed to upload the descriptor to one or more HsDirs.
107 ///
108 /// ## Reachability
109 ///
110 /// The service is unlikely to be reachable.
111 ///
112 DegradedUnreachable,
113 /// The service is running.
114 ///
115 /// Specifically, we are satisfied with our introduction points, and our
116 /// descriptor is up-to-date.
117 ///
118 /// ## Reachability
119 ///
120 /// The service is believed to be fully reachable.
121 Running,
122 /// The service is trying to recover from a minor interruption.
123 ///
124 /// Specifically:
125 /// * We have encountered a problem (like a dead intro point or an
126 /// intermittent failure to upload a descriptor)
127 /// * We are trying to recover from the problem.
128 /// * We have not yet failed.
129 ///
130 /// ## Reachability
131 ///
132 /// The service is unlikely to be reachable.
133 ///
134 //
135 // NOTE: this status is currently only set by `IptManager` whenever:
136 // * there are no good IPTs (so the service will be unreachable); or
137 // * there aren't enough good IPTs to publish (AFAICT in this case the service
138 // may be reachable, if the IPTs we _do_ have are have previously been published).
139 //
140 // TODO (#1270): split this state into 2 different states (one for the "service is
141 // still reachable" case, and another for the "unreachable" one).
142 Recovering,
143 /// The service is not working.
144 ///
145 /// Specifically, there is a problem with this onion service, and either it
146 /// is one we cannot recover from, or we have tried for a while to recover
147 /// and have failed.
148 ///
149 /// ## Reachability
150 ///
151 /// The service is not fully reachable. It may temporarily be reachable by some clients.
152 Broken,
153}
154
155impl State {
156 /// Check whether the service is *believed* to be fully reachable.
157 ///
158 /// This is at best an implication in one direction, even if this returns
159 /// `false`, the service may still be reachable.
160 pub fn is_fully_reachable(&self) -> bool {
161 matches!(self, Self::Running | Self::DegradedReachable)
162 }
163}
164
165/// An error type for descriptor upload failures with retries.
166#[derive(Clone, Debug, thiserror::Error)]
167#[non_exhaustive]
168pub enum DescUploadRetryError {
169 /// A fatal (non-transient) error occurred.
170 #[error("A fatal (non-transient) error occurred")]
171 FatalError(RetryError<DescUploadError>),
172
173 /// Ran out of retries.
174 #[error("Ran out of retries")]
175 MaxRetryCountExceeded(RetryError<DescUploadError>),
176
177 /// Exceeded the maximum allowed time.
178 #[error("Timeout exceeded")]
179 Timeout(RetryError<DescUploadError>),
180
181 /// Encountered an internal error.
182 #[error("Internal error")]
183 Bug(#[from] Bug),
184}
185
186/// A problem encountered by an onion service.
187#[derive(Clone, Debug, derive_more::From)]
188#[non_exhaustive]
189pub enum Problem {
190 /// A fatal error occurred.
191 Runtime(FatalError),
192
193 /// One or more descriptor uploads failed.
194 DescriptorUpload(Vec<DescUploadRetryError>),
195
196 /// We failed to establish one or more introduction points.
197 Ipt(Vec<IptError>),
198
199 /// Error in the PowManager subsystem
200 // TODO: add variants for other transient errors?
201 #[cfg(feature = "hs-pow-full")]
202 Pow(crate::pow::v1::PowError),
203}
204
205impl OnionServiceStatus {
206 /// Create a new OnionServiceStatus for a service that has not been bootstrapped.
207 pub(crate) fn new_shutdown() -> Self {
208 Self {
209 ipt_mgr: ComponentStatus::new_shutdown(),
210 publisher: ComponentStatus::new_shutdown(),
211 #[cfg(feature = "hs-pow-full")]
212 pow_manager: ComponentStatus::new_shutdown(),
213 }
214 }
215
216 /// Return the current high-level state of this onion service.
217 ///
218 /// The overall state is derived from the `State`s of its underlying components
219 /// (i.e. the IPT manager and descriptor publisher).
220 pub fn state(&self) -> State {
221 use State::*;
222
223 cfg_if::cfg_if! {
224 if #[cfg(feature = "hs-pow-full")] {
225 let pow_manager_state = self.pow_manager.state;
226 } else {
227 // This is slightly janky, but should give correct results.
228 let pow_manager_state = Running;
229 }
230 }
231
232 match (self.ipt_mgr.state, self.publisher.state, pow_manager_state) {
233 (Shutdown, _, _) | (_, Shutdown, _) => Shutdown,
234 (Bootstrapping, _, _) | (_, Bootstrapping, _) => Bootstrapping,
235 (Running, Running, Running) => Running,
236 (Recovering, _, _) | (_, Recovering, _) | (_, _, Recovering) => Recovering,
237 (Broken, _, _) | (_, Broken, _) => Broken,
238 (DegradedUnreachable, _, _) | (_, DegradedUnreachable, _) => DegradedUnreachable,
239 (DegradedReachable, Running, _)
240 | (Running, DegradedReachable, _)
241 | (DegradedReachable, DegradedReachable, _)
242 | (Running, Running, _) => DegradedReachable,
243 }
244 }
245
246 /// Return the most severe current problem
247 pub fn current_problem(&self) -> Option<&Problem> {
248 cfg_if::cfg_if! {
249 if #[cfg(feature = "hs-pow-full")] {
250 let pow_manager_error = &self.pow_manager.latest_error;
251 } else {
252 let pow_manager_error = &None;
253 }
254 }
255
256 match (
257 &self.ipt_mgr.latest_error,
258 &self.publisher.latest_error,
259 pow_manager_error,
260 ) {
261 (None, None, None) => None,
262 (Some(e), Some(_), _) => {
263 // For now, assume IPT manager errors are always more severe
264 // TODO: decide which error is the more severe (or return both)
265 Some(e)
266 }
267 (_, Some(e), _) | (Some(e), _, _) => Some(e),
268 (_, _, Some(e)) => Some(e),
269 }
270 }
271
272 /// Return a time before which the user must re-provision this onion service
273 /// with new keys.
274 ///
275 /// Returns `None` if the onion service is able to generate and sign new
276 /// keys as needed.
277 pub fn provisioned_key_expiration(&self) -> Option<SystemTime> {
278 None // TODO (#1194): Implement
279 }
280}
281
282/// A stream of OnionServiceStatus events, returned by an onion service.
283///
284/// Note that multiple status change events may be coalesced into one if the
285/// receiver does not read them as fast as they are generated. Note also
286/// that it's possible for an item to arise in this stream without an underlying
287/// change having occurred.
288///
289//
290// We define this so that we aren't exposing postage in our public API.
291#[derive(Clone)]
292pub struct OnionServiceStatusStream(postage::watch::Receiver<OnionServiceStatus>);
293
294impl futures::Stream for OnionServiceStatusStream {
295 type Item = OnionServiceStatus;
296
297 fn poll_next(
298 mut self: std::pin::Pin<&mut Self>,
299 cx: &mut std::task::Context<'_>,
300 ) -> std::task::Poll<Option<Self::Item>> {
301 self.0.poll_next_unpin(cx)
302 }
303}
304
305/// A shared handle to a postage::watch::Sender that we can use to update an OnionServiceStatus.
306#[derive(Clone)]
307pub(crate) struct StatusSender(Arc<Mutex<postage::watch::Sender<OnionServiceStatus>>>);
308
309/// A handle that can be used by the [`IptManager`]
310/// to update the [`OnionServiceStatus`].
311#[derive(Clone, derive_more::From)]
312pub(crate) struct IptMgrStatusSender(StatusSender);
313
314/// A handle that can be used by the [`Publisher`]
315/// to update the [`OnionServiceStatus`].
316#[derive(Clone, derive_more::From)]
317pub(crate) struct PublisherStatusSender(StatusSender);
318
319/// A handle that can be used by the [`Publisher`]
320/// to update the [`OnionServiceStatus`].
321#[derive(Clone, derive_more::From)]
322#[cfg(feature = "hs-pow-full")]
323pub(crate) struct PowManagerStatusSender(StatusSender);
324
325/// A helper for implementing [`PublisherStatusSender`], [`IptMgrStatusSender`], etc.
326///
327/// TODO: this macro is a bit repetitive, it would be nice if we could reduce duplication even
328/// further (and auto-generate a `note_<state>` function for every `State` variant).
329macro_rules! impl_status_sender {
330 ($sender:ident, $field:ident) => {
331 impl $sender {
332 /// Update `latest_error` and set the underlying state to `Broken`.
333 ///
334 /// If the new state is different, this updates the current status
335 /// and notifies all listeners.
336 pub(crate) fn send_broken(&self, err: impl Into<Problem>) {
337 self.send(State::Broken, Some(err.into()));
338 }
339
340 /// Update `latest_error` and set the underlying state to `Recovering`.
341 ///
342 /// If the new state is different, this updates the current status
343 /// and notifies all listeners.
344 #[allow(dead_code)] // NOTE: this is dead code in PublisherStatusSender
345 pub(crate) fn send_recovering(&self, err: impl Into<Problem>) {
346 self.send(State::Recovering, Some(err.into()));
347 }
348
349 /// Set `latest_error` to `None` and the underlying state to `Shutdown`.
350 ///
351 /// If the new state is different, this updates the current status
352 /// and notifies all listeners.
353 pub(crate) fn send_shutdown(&self) {
354 self.send(State::Shutdown, None);
355 }
356
357 /// Update the underlying state and latest_error.
358 ///
359 /// If the new state is different, this updates the current status
360 /// and notifies all listeners.
361 pub(crate) fn send(&self, state: State, err: Option<Problem>) {
362 let sender = &self.0;
363 let mut tx = sender.0.lock().expect("Poisoned lock");
364 let mut svc_status = tx.borrow().clone();
365 svc_status.$field.state = state;
366 svc_status.$field.latest_error = err;
367 tx.maybe_send(|_| svc_status);
368 }
369 }
370 };
371}
372
373impl_status_sender!(IptMgrStatusSender, ipt_mgr);
374impl_status_sender!(PublisherStatusSender, publisher);
375#[cfg(feature = "hs-pow-full")]
376impl_status_sender!(PowManagerStatusSender, pow_manager);
377
378impl StatusSender {
379 /// Create a new StatusSender with a given initial status.
380 pub(crate) fn new(initial_status: OnionServiceStatus) -> Self {
381 let (tx, _) = postage::watch::channel_with(initial_status);
382 StatusSender(Arc::new(Mutex::new(tx)))
383 }
384
385 /// Return a copy of the current status.
386 pub(crate) fn get(&self) -> OnionServiceStatus {
387 self.0.lock().expect("Poisoned lock").borrow().clone()
388 }
389
390 /// Return a new OnionServiceStatusStream to return events from this StatusSender.
391 pub(crate) fn subscribe(&self) -> OnionServiceStatusStream {
392 OnionServiceStatusStream(self.0.lock().expect("Poisoned lock").subscribe())
393 }
394}
395
396#[cfg(test)]
397impl OnionServiceStatus {
398 /// Return the current high-level state of the publisher`.
399 pub(crate) fn publisher_status(&self) -> ComponentStatus {
400 self.publisher.clone()
401 }
402}
403
404#[cfg(test)]
405impl ComponentStatus {
406 /// The current `State` of this component.
407 pub(crate) fn state(&self) -> State {
408 self.state
409 }
410
411 /// The current error of this component.
412 pub(crate) fn current_problem(&self) -> Option<&Problem> {
413 self.latest_error.as_ref()
414 }
415}