Skip to main content

nodedb_cluster/swim/
config.rs

1// SPDX-License-Identifier: BUSL-1.1
2
3//! SWIM protocol configuration.
4//!
5//! Tunable parameters that govern failure-detection latency, bandwidth, and
6//! false-positive rate. Defaults follow the Lifeguard recommendations for
7//! a ≤ 256-node cluster and are safe for production without tuning.
8
9use std::time::Duration;
10
11use super::error::SwimError;
12use super::incarnation::Incarnation;
13
14/// Configuration for the SWIM failure detector.
15///
16/// All fields are validated at construction time via [`SwimConfig::validate`];
17/// an invalid config is a programmer error and returns a typed
18/// [`SwimError::InvalidConfig`] rather than panicking.
19#[derive(Debug, Clone)]
20pub struct SwimConfig {
21    /// Time between probe rounds (T' in the SWIM paper). One randomly-chosen
22    /// alive peer is pinged per interval.
23    pub probe_interval: Duration,
24
25    /// Round-trip deadline for a direct ping before falling back to k
26    /// indirect pings. Must be strictly less than `probe_interval`.
27    pub probe_timeout: Duration,
28
29    /// Number of indirect probe helpers (`k` in the paper).
30    pub indirect_probes: u8,
31
32    /// Multiplier on `probe_interval` used to compute the suspicion timeout
33    /// before a `Suspect` member is declared `Dead`. Lifeguard §3.1.
34    pub suspicion_mult: u8,
35
36    /// Minimum value for the suspicion timeout; protects small clusters from
37    /// sub-second suspicion windows. The effective timeout is
38    /// `max(min_suspicion, suspicion_mult * log2(n) * probe_interval)`.
39    pub min_suspicion: Duration,
40
41    /// Seed incarnation for a freshly-booted local node. Always `0` in
42    /// production; exposed for deterministic unit tests.
43    pub initial_incarnation: Incarnation,
44
45    /// Maximum number of membership deltas to piggyback on a single
46    /// outgoing SWIM datagram. Caps per-message bandwidth and bounds
47    /// the encoded payload size below a UDP MTU.
48    pub max_piggyback: usize,
49
50    /// Gossip fanout multiplier (`lambda` in Das §4.3). The
51    /// dissemination queue drops a rumour after it has been carried
52    /// on `ceil(fanout_lambda * log2(n+1))` outgoing messages, which
53    /// with high probability reaches every member.
54    pub fanout_lambda: u32,
55}
56
57impl SwimConfig {
58    /// Production defaults from Lifeguard, tuned for a ≤ 256-node cluster.
59    pub fn production() -> Self {
60        Self {
61            probe_interval: Duration::from_millis(1000),
62            probe_timeout: Duration::from_millis(500),
63            indirect_probes: 3,
64            suspicion_mult: 4,
65            min_suspicion: Duration::from_secs(2),
66            initial_incarnation: Incarnation::ZERO,
67            max_piggyback: 6,
68            fanout_lambda: 3,
69        }
70    }
71
72    /// Validate the configuration. Returns `InvalidConfig` if any invariant
73    /// fails. Callers should treat validation failure as a fatal startup
74    /// error — SWIM cannot run with incoherent timing parameters.
75    pub fn validate(&self) -> Result<(), SwimError> {
76        if self.probe_interval.is_zero() {
77            return Err(SwimError::InvalidConfig {
78                field: "probe_interval",
79                reason: "must be non-zero",
80            });
81        }
82        if self.probe_timeout >= self.probe_interval {
83            return Err(SwimError::InvalidConfig {
84                field: "probe_timeout",
85                reason: "must be strictly less than probe_interval",
86            });
87        }
88        if self.indirect_probes == 0 {
89            return Err(SwimError::InvalidConfig {
90                field: "indirect_probes",
91                reason: "must be at least 1",
92            });
93        }
94        if self.suspicion_mult == 0 {
95            return Err(SwimError::InvalidConfig {
96                field: "suspicion_mult",
97                reason: "must be at least 1",
98            });
99        }
100        if self.min_suspicion.is_zero() {
101            return Err(SwimError::InvalidConfig {
102                field: "min_suspicion",
103                reason: "must be non-zero",
104            });
105        }
106        if self.max_piggyback == 0 {
107            return Err(SwimError::InvalidConfig {
108                field: "max_piggyback",
109                reason: "must be at least 1",
110            });
111        }
112        if self.fanout_lambda == 0 {
113            return Err(SwimError::InvalidConfig {
114                field: "fanout_lambda",
115                reason: "must be at least 1",
116            });
117        }
118        Ok(())
119    }
120}
121
122impl Default for SwimConfig {
123    fn default() -> Self {
124        Self::production()
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131
132    #[test]
133    fn production_defaults_are_valid() {
134        SwimConfig::production().validate().expect("valid");
135    }
136
137    #[test]
138    fn zero_probe_interval_rejected() {
139        let mut cfg = SwimConfig::production();
140        cfg.probe_interval = Duration::ZERO;
141        assert!(matches!(
142            cfg.validate(),
143            Err(SwimError::InvalidConfig {
144                field: "probe_interval",
145                ..
146            })
147        ));
148    }
149
150    #[test]
151    fn probe_timeout_must_be_less_than_interval() {
152        let mut cfg = SwimConfig::production();
153        cfg.probe_timeout = cfg.probe_interval;
154        assert!(matches!(
155            cfg.validate(),
156            Err(SwimError::InvalidConfig {
157                field: "probe_timeout",
158                ..
159            })
160        ));
161    }
162
163    #[test]
164    fn zero_indirect_probes_rejected() {
165        let mut cfg = SwimConfig::production();
166        cfg.indirect_probes = 0;
167        assert!(matches!(
168            cfg.validate(),
169            Err(SwimError::InvalidConfig {
170                field: "indirect_probes",
171                ..
172            })
173        ));
174    }
175
176    #[test]
177    fn zero_suspicion_mult_rejected() {
178        let mut cfg = SwimConfig::production();
179        cfg.suspicion_mult = 0;
180        assert!(matches!(
181            cfg.validate(),
182            Err(SwimError::InvalidConfig {
183                field: "suspicion_mult",
184                ..
185            })
186        ));
187    }
188
189    #[test]
190    fn zero_min_suspicion_rejected() {
191        let mut cfg = SwimConfig::production();
192        cfg.min_suspicion = Duration::ZERO;
193        assert!(matches!(
194            cfg.validate(),
195            Err(SwimError::InvalidConfig {
196                field: "min_suspicion",
197                ..
198            })
199        ));
200    }
201
202    #[test]
203    fn zero_max_piggyback_rejected() {
204        let mut cfg = SwimConfig::production();
205        cfg.max_piggyback = 0;
206        assert!(matches!(
207            cfg.validate(),
208            Err(SwimError::InvalidConfig {
209                field: "max_piggyback",
210                ..
211            })
212        ));
213    }
214
215    #[test]
216    fn zero_fanout_lambda_rejected() {
217        let mut cfg = SwimConfig::production();
218        cfg.fanout_lambda = 0;
219        assert!(matches!(
220            cfg.validate(),
221            Err(SwimError::InvalidConfig {
222                field: "fanout_lambda",
223                ..
224            })
225        ));
226    }
227}