Skip to main content

reddb_server/cluster/
membership.rs

1//! Cluster member identity, the authorized-member catalog, and the resilient
2//! three-data-member baseline (issue #988, PRD #987, ADR 0030).
3//!
4//! This is the first vertical slice of multi-writer cluster membership. It
5//! defines *who is a cluster member* as control-plane state that is distinct
6//! from *which ranges a member owns or replicates* (the per-range roles in
7//! [`clustering`](../../../.red/context/clustering.md) and ADR 0045). A node
8//! has exactly one stable [cluster member identity]; range ownership is a
9//! separate, per-range role assigned later by the rebalancer.
10//!
11//! ## What lives here
12//!
13//! * [`ClusterId`] — the cluster's own stable identity. A candidate must
14//!   present the right cluster id to join; a peer that targets a different
15//!   cluster is rejected ([`super::join`]).
16//! * [`MemberKind`] — whether a member holds user data ([`MemberKind::Data`])
17//!   or is a vote-only witness ([`MemberKind::Witness`]). The resilient
18//!   multi-writer baseline counts **data** members; witnesses are not the
19//!   recommended baseline (glossary: *Voting member*).
20//! * [`ClusterMember`] — one authorized member: its [`NodeIdentity`], its
21//!   kind, and how many user ranges it currently holds. A freshly joined data
22//!   member holds **zero** ranges — joining never moves user ranges.
23//! * [`MembershipCatalog`] — the authorized-member set for one cluster. This
24//!   is the *only* set autodetect of health and topology is allowed to range
25//!   over: an arbitrary network peer that has not joined is not a member and
26//!   is not an autodetect candidate.
27//!
28//! The join handshake itself — authenticate against a seed, verify cluster
29//! identity, reject unknown/unauthorized peers, then admit and hand back the
30//! control-plane snapshot — lives in [`super::join`].
31//!
32//! Everything here is a pure data model with no I/O, so the whole membership
33//! and join story is exercised deterministically.
34
35use std::collections::BTreeMap;
36
37use super::identity::NodeIdentity;
38
39/// The resilient baseline for a multi-writer cluster, in **data** members.
40///
41/// The glossary fixes this: *"A resilient multi-writer cluster starts with
42/// three data members; witness members are not the recommended baseline for
43/// multi-writer clustering."* Three data members give a quorum of two that
44/// survives the loss of any single member without a witness.
45pub const RESILIENT_DATA_MEMBER_BASELINE: usize = 3;
46
47/// The cluster's own stable identity.
48///
49/// Every authorized member agrees on this value, and a join candidate must
50/// present it to be admitted (see [`super::join`]). It is what makes a
51/// "wrong-cluster" join detectable: a peer that authenticates fine but targets
52/// a *different* cluster is rejected, not merged in.
53#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
54pub struct ClusterId(String);
55
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct ClusterIdError;
58
59impl std::fmt::Display for ClusterIdError {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        write!(f, "cluster id is empty")
62    }
63}
64
65impl std::error::Error for ClusterIdError {}
66
67impl ClusterId {
68    /// Build a cluster id from an operator-provisioned value. The value must
69    /// be non-empty; a blank cluster id would let any peer "match" by
70    /// presenting nothing.
71    pub fn new(value: impl AsRef<str>) -> Result<Self, ClusterIdError> {
72        let value = value.as_ref().trim();
73        if value.is_empty() {
74            return Err(ClusterIdError);
75        }
76        Ok(Self(value.to_string()))
77    }
78
79    pub fn as_str(&self) -> &str {
80        &self.0
81    }
82}
83
84impl std::fmt::Display for ClusterId {
85    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86        f.write_str(&self.0)
87    }
88}
89
90/// Whether a member holds user data or is a vote-only witness.
91///
92/// This mirrors the election-side `MemberKind` (a witness votes but never owns
93/// a range), but it is the *cluster-membership* view: it decides whether a
94/// member counts toward the resilient **data-member** baseline. A witness is a
95/// member, but it is not a data member, so it does not move the cluster toward
96/// [`RESILIENT_DATA_MEMBER_BASELINE`].
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98pub enum MemberKind {
99    /// Holds user data; can be a range owner for some ranges and a range
100    /// replica for others.
101    Data,
102    /// Control-plane only; stores no user data and is never a range owner.
103    Witness,
104}
105
106impl MemberKind {
107    /// Does this member kind store user data (and therefore count toward the
108    /// resilient multi-writer baseline)?
109    pub fn holds_data(self) -> bool {
110        matches!(self, MemberKind::Data)
111    }
112}
113
114/// One authorized cluster member.
115///
116/// The [`NodeIdentity`] is the member's stable cluster identity — the same
117/// validated X.509 subject it authenticates and votes under. `owned_range_count`
118/// is the *per-range* role count, kept deliberately separate: a member's
119/// cluster identity does not change when ranges move on or off it, and a
120/// freshly joined data member starts at zero.
121#[derive(Debug, Clone, PartialEq, Eq)]
122pub struct ClusterMember {
123    identity: NodeIdentity,
124    kind: MemberKind,
125    owned_range_count: usize,
126}
127
128impl ClusterMember {
129    /// A member as it exists immediately after a successful join: authorized,
130    /// of the granted kind, and holding **no** user ranges. Ranges are only
131    /// assigned later by rebalancing or ownership transitions.
132    pub fn joined_empty(identity: NodeIdentity, kind: MemberKind) -> Self {
133        Self {
134            identity,
135            kind,
136            owned_range_count: 0,
137        }
138    }
139
140    pub fn identity(&self) -> &NodeIdentity {
141        &self.identity
142    }
143
144    pub fn kind(&self) -> MemberKind {
145        self.kind
146    }
147
148    /// How many user ranges this member currently owns. Distinct from cluster
149    /// membership: a member with zero ranges is still a full member.
150    pub fn owned_range_count(&self) -> usize {
151        self.owned_range_count
152    }
153
154    /// Does this member currently hold any user ranges? A just-joined member
155    /// answers `false` until the rebalancer assigns ownership.
156    pub fn holds_user_ranges(&self) -> bool {
157        self.owned_range_count > 0
158    }
159
160    /// Record that the rebalancer/ownership transitions have assigned this many
161    /// user ranges to the member. This is the *only* path that gives a member
162    /// ranges — join never does.
163    pub fn assign_ranges(&mut self, count: usize) {
164        self.owned_range_count = count;
165    }
166}
167
168/// How a candidate compared against the authorized-member set on join.
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum AdmissionOutcome {
171    /// The candidate was not previously a member and was admitted now.
172    Admitted,
173    /// The candidate was already an authorized member; the catalog is
174    /// unchanged (join is idempotent on reconnect).
175    AlreadyMember,
176}
177
178/// The authorized-member set for one cluster — the control-plane membership
179/// catalog.
180///
181/// Membership is explicit: a node appears here only after a successful join
182/// ([`super::join`]). Autodetect of health and topology ranges over
183/// [`autodetect_candidates`](Self::autodetect_candidates) — i.e. *these
184/// members only* — never over arbitrary peers that happen to be reachable on
185/// the network.
186#[derive(Debug, Clone)]
187pub struct MembershipCatalog {
188    cluster_id: ClusterId,
189    members: BTreeMap<NodeIdentity, ClusterMember>,
190}
191
192impl MembershipCatalog {
193    /// A catalog for `cluster_id` seeded with `founders`. The founding data
194    /// members are the bootstrap set that later candidates authenticate
195    /// against; each starts empty.
196    pub fn new(cluster_id: ClusterId, founders: impl IntoIterator<Item = ClusterMember>) -> Self {
197        let members = founders
198            .into_iter()
199            .map(|m| (m.identity().clone(), m))
200            .collect();
201        Self {
202            cluster_id,
203            members,
204        }
205    }
206
207    pub fn cluster_id(&self) -> &ClusterId {
208        &self.cluster_id
209    }
210
211    /// Is `identity` an authorized member of this cluster? This is the gate
212    /// every control-plane path consults — only an authorized member's health
213    /// and topology are autodetected, and only a member may vote or own ranges.
214    pub fn is_authorized(&self, identity: &NodeIdentity) -> bool {
215        self.members.contains_key(identity)
216    }
217
218    pub fn member(&self, identity: &NodeIdentity) -> Option<&ClusterMember> {
219        self.members.get(identity)
220    }
221
222    pub fn member_mut(&mut self, identity: &NodeIdentity) -> Option<&mut ClusterMember> {
223        self.members.get_mut(identity)
224    }
225
226    /// Admit `member` as authorized. Idempotent: re-admitting an existing
227    /// member leaves the catalog (and the member's range count) untouched, so
228    /// a reconnecting member never has its ranges reset to zero.
229    pub fn admit(&mut self, member: ClusterMember) -> AdmissionOutcome {
230        if self.members.contains_key(member.identity()) {
231            return AdmissionOutcome::AlreadyMember;
232        }
233        self.members.insert(member.identity().clone(), member);
234        AdmissionOutcome::Admitted
235    }
236
237    /// Every authorized member, in stable identity order.
238    pub fn members(&self) -> impl Iterator<Item = &ClusterMember> {
239        self.members.values()
240    }
241
242    /// The members autodetect of health/topology is allowed to range over —
243    /// exactly the authorized members. An arbitrary network peer that has not
244    /// joined is absent here, so autodetect can never silently adopt it.
245    pub fn autodetect_candidates(&self) -> impl Iterator<Item = &ClusterMember> {
246        self.members()
247    }
248
249    /// Whether autodetect may consider `identity`. True only for authorized
250    /// members — the rule that "autodetect applies only to authorized members
251    /// after join, not arbitrary network peers".
252    pub fn is_autodetect_eligible(&self, identity: &NodeIdentity) -> bool {
253        self.is_authorized(identity)
254    }
255
256    pub fn len(&self) -> usize {
257        self.members.len()
258    }
259
260    pub fn is_empty(&self) -> bool {
261        self.members.is_empty()
262    }
263
264    /// How many **data** members the cluster currently has (witnesses
265    /// excluded). This is the number the resilient baseline is measured in.
266    pub fn data_member_count(&self) -> usize {
267        self.members().filter(|m| m.kind().holds_data()).count()
268    }
269
270    /// Assess the cluster against the resilient multi-writer baseline of
271    /// [`RESILIENT_DATA_MEMBER_BASELINE`] data members.
272    pub fn assess_baseline(&self) -> BaselineAssessment {
273        BaselineAssessment::evaluate(self.data_member_count())
274    }
275}
276
277/// How the cluster's data-member count compares to the resilient baseline.
278#[derive(Debug, Clone, Copy, PartialEq, Eq)]
279pub struct BaselineAssessment {
280    /// The configured resilient baseline ([`RESILIENT_DATA_MEMBER_BASELINE`]).
281    pub recommended_data_members: usize,
282    /// The cluster's current data-member count.
283    pub data_members: usize,
284}
285
286impl BaselineAssessment {
287    fn evaluate(data_members: usize) -> Self {
288        Self {
289            recommended_data_members: RESILIENT_DATA_MEMBER_BASELINE,
290            data_members,
291        }
292    }
293
294    /// Does the cluster meet (or exceed) the resilient multi-writer baseline?
295    pub fn meets_baseline(&self) -> bool {
296        self.data_members >= self.recommended_data_members
297    }
298
299    /// How many more data members are needed to reach the baseline (zero once
300    /// met).
301    pub fn shortfall(&self) -> usize {
302        self.recommended_data_members
303            .saturating_sub(self.data_members)
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    fn ident(cn: &str) -> NodeIdentity {
312        NodeIdentity::from_certificate_subject(cn).unwrap()
313    }
314
315    fn data_member(cn: &str) -> ClusterMember {
316        ClusterMember::joined_empty(ident(cn), MemberKind::Data)
317    }
318
319    #[test]
320    fn cluster_id_rejects_empty() {
321        assert!(ClusterId::new("   ").is_err());
322        assert_eq!(ClusterId::new(" cluster-x ").unwrap().as_str(), "cluster-x");
323    }
324
325    #[test]
326    fn member_identity_is_distinct_from_range_ownership() {
327        // A member's cluster identity is stable; assigning/removing ranges is a
328        // separate per-range role and does not change membership.
329        let mut m = data_member("CN=node-a");
330        assert!(!m.holds_user_ranges());
331        assert_eq!(m.owned_range_count(), 0);
332
333        m.assign_ranges(4);
334        assert!(m.holds_user_ranges());
335        assert_eq!(m.identity(), &ident("CN=node-a")); // identity unchanged
336    }
337
338    #[test]
339    fn data_member_count_excludes_witnesses() {
340        let cid = ClusterId::new("cluster-x").unwrap();
341        let catalog = MembershipCatalog::new(
342            cid,
343            [
344                data_member("CN=node-a"),
345                data_member("CN=node-b"),
346                ClusterMember::joined_empty(ident("CN=witness"), MemberKind::Witness),
347            ],
348        );
349        assert_eq!(catalog.len(), 3);
350        assert_eq!(catalog.data_member_count(), 2);
351    }
352
353    #[test]
354    fn three_data_members_meet_resilient_baseline() {
355        let cid = ClusterId::new("cluster-x").unwrap();
356        let catalog = MembershipCatalog::new(
357            cid,
358            [
359                data_member("CN=node-a"),
360                data_member("CN=node-b"),
361                data_member("CN=node-c"),
362            ],
363        );
364        let baseline = catalog.assess_baseline();
365        assert_eq!(baseline.recommended_data_members, 3);
366        assert!(baseline.meets_baseline());
367        assert_eq!(baseline.shortfall(), 0);
368    }
369
370    #[test]
371    fn two_data_plus_witness_does_not_meet_baseline() {
372        // A witness is not the recommended baseline: 2 data + 1 witness is
373        // below the three-data-member baseline.
374        let cid = ClusterId::new("cluster-x").unwrap();
375        let catalog = MembershipCatalog::new(
376            cid,
377            [
378                data_member("CN=node-a"),
379                data_member("CN=node-b"),
380                ClusterMember::joined_empty(ident("CN=witness"), MemberKind::Witness),
381            ],
382        );
383        let baseline = catalog.assess_baseline();
384        assert!(!baseline.meets_baseline());
385        assert_eq!(baseline.shortfall(), 1);
386    }
387
388    #[test]
389    fn admit_is_idempotent_and_preserves_ranges() {
390        let cid = ClusterId::new("cluster-x").unwrap();
391        let mut catalog = MembershipCatalog::new(cid, [data_member("CN=node-a")]);
392        catalog
393            .member_mut(&ident("CN=node-a"))
394            .unwrap()
395            .assign_ranges(3);
396
397        // Re-admitting must not reset an existing member's range count.
398        let outcome = catalog.admit(data_member("CN=node-a"));
399        assert_eq!(outcome, AdmissionOutcome::AlreadyMember);
400        assert_eq!(
401            catalog
402                .member(&ident("CN=node-a"))
403                .unwrap()
404                .owned_range_count(),
405            3
406        );
407
408        let outcome = catalog.admit(data_member("CN=node-b"));
409        assert_eq!(outcome, AdmissionOutcome::Admitted);
410        assert_eq!(catalog.len(), 2);
411    }
412
413    #[test]
414    fn autodetect_is_limited_to_authorized_members() {
415        let cid = ClusterId::new("cluster-x").unwrap();
416        let catalog = MembershipCatalog::new(cid, [data_member("CN=node-a")]);
417
418        // An authorized member is an autodetect candidate.
419        assert!(catalog.is_autodetect_eligible(&ident("CN=node-a")));
420        // An arbitrary reachable network peer that never joined is not.
421        assert!(!catalog.is_autodetect_eligible(&ident("CN=random-peer")));
422        assert_eq!(catalog.autodetect_candidates().count(), 1);
423    }
424}