Skip to main content

nodedb_cluster/swim/
error.rs

1// SPDX-License-Identifier: BUSL-1.1
2
3//! Typed error variants for the SWIM subsystem.
4//!
5//! `SwimError` is the single error type returned by every public function
6//! in `nodedb_cluster::swim`. It is wired into the cluster-wide
7//! [`ClusterError`] enum via a `From` impl in `crate::error`, which in turn
8//! bridges to `nodedb_types::NodeDbError` at the public API boundary.
9
10use thiserror::Error;
11
12use nodedb_types::NodeId;
13
14use super::incarnation::Incarnation;
15use super::member::MemberState;
16
17/// Errors produced by the SWIM failure detector and membership layer.
18#[derive(Debug, Error)]
19pub enum SwimError {
20    /// A message or update referenced a node id not present in the
21    /// membership list. This is non-fatal — the detector will request a
22    /// full sync from the sender.
23    #[error("swim: unknown member {node_id}")]
24    UnknownMember { node_id: NodeId },
25
26    /// Received update carries an incarnation strictly older than the
27    /// locally recorded value, so the update is refuted.
28    #[error("swim: stale incarnation for {node_id}: received {received:?} <= local {local:?}")]
29    StaleIncarnation {
30        node_id: NodeId,
31        received: Incarnation,
32        local: Incarnation,
33    },
34
35    /// Received a `Suspect` update targeting the local node. The failure
36    /// detector must bump its own incarnation and broadcast an `Alive`
37    /// refutation. Callers treat this as a signal, not a fatal error.
38    #[error("swim: local node suspected at incarnation {incarnation:?}")]
39    SelfSuspected { incarnation: Incarnation },
40
41    /// A state transition violated the SWIM state machine (e.g. attempting
42    /// to move a `Left` member back to `Alive`). Always a bug.
43    #[error("swim: invalid state transition {from:?} -> {to:?}")]
44    InvalidTransition { from: MemberState, to: MemberState },
45
46    /// Configuration validation failed. Returned by [`super::SwimConfig::validate`].
47    #[error("swim: invalid config field {field}: {reason}")]
48    InvalidConfig {
49        field: &'static str,
50        reason: &'static str,
51    },
52
53    /// zerompk failed to serialize a `SwimMessage`. In practice this is
54    /// infallible for the current message schema — the variant exists so
55    /// future additions to the wire format cannot silently panic.
56    #[error("swim: encode failure: {detail}")]
57    Encode { detail: String },
58
59    /// zerompk failed to parse incoming bytes as a `SwimMessage`. Common
60    /// causes: truncated datagram, version skew, random UDP noise.
61    #[error("swim: decode failure: {detail}")]
62    Decode { detail: String },
63
64    /// Transport backend has been closed; no further I/O is possible.
65    /// Returned by [`super::detector::Transport::recv`] on shutdown.
66    #[error("swim: transport closed")]
67    TransportClosed,
68
69    /// The in-flight probe map is full. Should never happen in practice —
70    /// the detector caps concurrent probes at a few tens — but the error
71    /// exists so a runaway bug cannot corrupt the detector state.
72    #[error("swim: probe inflight table overflow")]
73    ProbeInflightOverflow,
74}
75
76impl From<SwimError> for crate::error::ClusterError {
77    fn from(err: SwimError) -> Self {
78        crate::error::ClusterError::Transport {
79            detail: err.to_string(),
80        }
81    }
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87
88    #[test]
89    fn display_contains_context() {
90        let err = SwimError::StaleIncarnation {
91            node_id: NodeId::try_new("n1").expect("test fixture"),
92            received: Incarnation::new(3),
93            local: Incarnation::new(5),
94        };
95        let msg = err.to_string();
96        assert!(msg.contains("n1"));
97        assert!(msg.contains('3'));
98        assert!(msg.contains('5'));
99    }
100
101    #[test]
102    fn invalid_config_display() {
103        let err = SwimError::InvalidConfig {
104            field: "probe_timeout",
105            reason: "must be strictly less than probe_interval",
106        };
107        assert!(err.to_string().contains("probe_timeout"));
108    }
109
110    #[test]
111    fn bridges_to_cluster_error() {
112        let err: crate::error::ClusterError = SwimError::UnknownMember {
113            node_id: NodeId::try_new("n42").expect("test fixture"),
114        }
115        .into();
116        assert!(matches!(err, crate::error::ClusterError::Transport { .. }));
117    }
118}