reddb_server/cluster/membership.rs
1//! Cluster member identity, the authorized-member catalog, and the resilient
2//! three-data-member baseline (issue #988, PRD #987, ADR 0030).
3//!
4//! This is the first vertical slice of multi-writer cluster membership. It
5//! defines *who is a cluster member* as control-plane state that is distinct
6//! from *which ranges a member owns or replicates* (the per-range roles in
7//! [`clustering`](../../../.red/context/clustering.md) and ADR 0045). A node
8//! has exactly one stable [cluster member identity]; range ownership is a
9//! separate, per-range role assigned later by the rebalancer.
10//!
11//! ## What lives here
12//!
13//! * [`ClusterId`] — the cluster's own stable identity. A candidate must
14//! present the right cluster id to join; a peer that targets a different
15//! cluster is rejected ([`super::join`]).
16//! * [`MemberKind`] — whether a member holds user data ([`MemberKind::Data`])
17//! or is a vote-only witness ([`MemberKind::Witness`]). The resilient
18//! multi-writer baseline counts **data** members; witnesses are not the
19//! recommended baseline (glossary: *Voting member*).
20//! * [`ClusterMember`] — one authorized member: its [`NodeIdentity`], its
21//! kind, and how many user ranges it currently holds. A freshly joined data
22//! member holds **zero** ranges — joining never moves user ranges.
23//! * [`MembershipCatalog`] — the authorized-member set for one cluster. This
24//! is the *only* set autodetect of health and topology is allowed to range
25//! over: an arbitrary network peer that has not joined is not a member and
26//! is not an autodetect candidate.
27//!
28//! The join handshake itself — authenticate against a seed, verify cluster
29//! identity, reject unknown/unauthorized peers, then admit and hand back the
30//! control-plane snapshot — lives in [`super::join`].
31//!
32//! Everything here is a pure data model with no I/O, so the whole membership
33//! and join story is exercised deterministically.
34
35use std::collections::BTreeMap;
36
37use super::identity::NodeIdentity;
38
39/// The resilient baseline for a multi-writer cluster, in **data** members.
40///
41/// The glossary fixes this: *"A resilient multi-writer cluster starts with
42/// three data members; witness members are not the recommended baseline for
43/// multi-writer clustering."* Three data members give a quorum of two that
44/// survives the loss of any single member without a witness.
45pub const RESILIENT_DATA_MEMBER_BASELINE: usize = 3;
46
47/// The cluster's own stable identity.
48///
49/// Every authorized member agrees on this value, and a join candidate must
50/// present it to be admitted (see [`super::join`]). It is what makes a
51/// "wrong-cluster" join detectable: a peer that authenticates fine but targets
52/// a *different* cluster is rejected, not merged in.
53#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
54pub struct ClusterId(String);
55
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct ClusterIdError;
58
59impl std::fmt::Display for ClusterIdError {
60 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61 write!(f, "cluster id is empty")
62 }
63}
64
65impl std::error::Error for ClusterIdError {}
66
67impl ClusterId {
68 /// Build a cluster id from an operator-provisioned value. The value must
69 /// be non-empty; a blank cluster id would let any peer "match" by
70 /// presenting nothing.
71 pub fn new(value: impl AsRef<str>) -> Result<Self, ClusterIdError> {
72 let value = value.as_ref().trim();
73 if value.is_empty() {
74 return Err(ClusterIdError);
75 }
76 Ok(Self(value.to_string()))
77 }
78
79 pub fn as_str(&self) -> &str {
80 &self.0
81 }
82}
83
84impl std::fmt::Display for ClusterId {
85 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86 f.write_str(&self.0)
87 }
88}
89
90/// Whether a member holds user data or is a vote-only witness.
91///
92/// This mirrors the election-side `MemberKind` (a witness votes but never owns
93/// a range), but it is the *cluster-membership* view: it decides whether a
94/// member counts toward the resilient **data-member** baseline. A witness is a
95/// member, but it is not a data member, so it does not move the cluster toward
96/// [`RESILIENT_DATA_MEMBER_BASELINE`].
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98pub enum MemberKind {
99 /// Holds user data; can be a range owner for some ranges and a range
100 /// replica for others.
101 Data,
102 /// Control-plane only; stores no user data and is never a range owner.
103 Witness,
104}
105
106impl MemberKind {
107 /// Does this member kind store user data (and therefore count toward the
108 /// resilient multi-writer baseline)?
109 pub fn holds_data(self) -> bool {
110 matches!(self, MemberKind::Data)
111 }
112}
113
114/// One authorized cluster member.
115///
116/// The [`NodeIdentity`] is the member's stable cluster identity — the same
117/// validated X.509 subject it authenticates and votes under. `owned_range_count`
118/// is the *per-range* role count, kept deliberately separate: a member's
119/// cluster identity does not change when ranges move on or off it, and a
120/// freshly joined data member starts at zero.
121#[derive(Debug, Clone, PartialEq, Eq)]
122pub struct ClusterMember {
123 identity: NodeIdentity,
124 kind: MemberKind,
125 owned_range_count: usize,
126}
127
128impl ClusterMember {
129 /// A member as it exists immediately after a successful join: authorized,
130 /// of the granted kind, and holding **no** user ranges. Ranges are only
131 /// assigned later by rebalancing or ownership transitions.
132 pub fn joined_empty(identity: NodeIdentity, kind: MemberKind) -> Self {
133 Self {
134 identity,
135 kind,
136 owned_range_count: 0,
137 }
138 }
139
140 pub fn identity(&self) -> &NodeIdentity {
141 &self.identity
142 }
143
144 pub fn kind(&self) -> MemberKind {
145 self.kind
146 }
147
148 /// How many user ranges this member currently owns. Distinct from cluster
149 /// membership: a member with zero ranges is still a full member.
150 pub fn owned_range_count(&self) -> usize {
151 self.owned_range_count
152 }
153
154 /// Does this member currently hold any user ranges? A just-joined member
155 /// answers `false` until the rebalancer assigns ownership.
156 pub fn holds_user_ranges(&self) -> bool {
157 self.owned_range_count > 0
158 }
159
160 /// Record that the rebalancer/ownership transitions have assigned this many
161 /// user ranges to the member. This is the *only* path that gives a member
162 /// ranges — join never does.
163 pub fn assign_ranges(&mut self, count: usize) {
164 self.owned_range_count = count;
165 }
166}
167
168/// How a candidate compared against the authorized-member set on join.
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum AdmissionOutcome {
171 /// The candidate was not previously a member and was admitted now.
172 Admitted,
173 /// The candidate was already an authorized member; the catalog is
174 /// unchanged (join is idempotent on reconnect).
175 AlreadyMember,
176}
177
178/// The authorized-member set for one cluster — the control-plane membership
179/// catalog.
180///
181/// Membership is explicit: a node appears here only after a successful join
182/// ([`super::join`]). Autodetect of health and topology ranges over
183/// [`autodetect_candidates`](Self::autodetect_candidates) — i.e. *these
184/// members only* — never over arbitrary peers that happen to be reachable on
185/// the network.
186#[derive(Debug, Clone)]
187pub struct MembershipCatalog {
188 cluster_id: ClusterId,
189 members: BTreeMap<NodeIdentity, ClusterMember>,
190}
191
192impl MembershipCatalog {
193 /// A catalog for `cluster_id` seeded with `founders`. The founding data
194 /// members are the bootstrap set that later candidates authenticate
195 /// against; each starts empty.
196 pub fn new(cluster_id: ClusterId, founders: impl IntoIterator<Item = ClusterMember>) -> Self {
197 let members = founders
198 .into_iter()
199 .map(|m| (m.identity().clone(), m))
200 .collect();
201 Self {
202 cluster_id,
203 members,
204 }
205 }
206
207 pub fn cluster_id(&self) -> &ClusterId {
208 &self.cluster_id
209 }
210
211 /// Is `identity` an authorized member of this cluster? This is the gate
212 /// every control-plane path consults — only an authorized member's health
213 /// and topology are autodetected, and only a member may vote or own ranges.
214 pub fn is_authorized(&self, identity: &NodeIdentity) -> bool {
215 self.members.contains_key(identity)
216 }
217
218 pub fn member(&self, identity: &NodeIdentity) -> Option<&ClusterMember> {
219 self.members.get(identity)
220 }
221
222 pub fn member_mut(&mut self, identity: &NodeIdentity) -> Option<&mut ClusterMember> {
223 self.members.get_mut(identity)
224 }
225
226 /// Admit `member` as authorized. Idempotent: re-admitting an existing
227 /// member leaves the catalog (and the member's range count) untouched, so
228 /// a reconnecting member never has its ranges reset to zero.
229 pub fn admit(&mut self, member: ClusterMember) -> AdmissionOutcome {
230 if self.members.contains_key(member.identity()) {
231 return AdmissionOutcome::AlreadyMember;
232 }
233 self.members.insert(member.identity().clone(), member);
234 AdmissionOutcome::Admitted
235 }
236
237 /// Every authorized member, in stable identity order.
238 pub fn members(&self) -> impl Iterator<Item = &ClusterMember> {
239 self.members.values()
240 }
241
242 /// The members autodetect of health/topology is allowed to range over —
243 /// exactly the authorized members. An arbitrary network peer that has not
244 /// joined is absent here, so autodetect can never silently adopt it.
245 pub fn autodetect_candidates(&self) -> impl Iterator<Item = &ClusterMember> {
246 self.members()
247 }
248
249 /// Whether autodetect may consider `identity`. True only for authorized
250 /// members — the rule that "autodetect applies only to authorized members
251 /// after join, not arbitrary network peers".
252 pub fn is_autodetect_eligible(&self, identity: &NodeIdentity) -> bool {
253 self.is_authorized(identity)
254 }
255
256 pub fn len(&self) -> usize {
257 self.members.len()
258 }
259
260 pub fn is_empty(&self) -> bool {
261 self.members.is_empty()
262 }
263
264 /// How many **data** members the cluster currently has (witnesses
265 /// excluded). This is the number the resilient baseline is measured in.
266 pub fn data_member_count(&self) -> usize {
267 self.members().filter(|m| m.kind().holds_data()).count()
268 }
269
270 /// Assess the cluster against the resilient multi-writer baseline of
271 /// [`RESILIENT_DATA_MEMBER_BASELINE`] data members.
272 pub fn assess_baseline(&self) -> BaselineAssessment {
273 BaselineAssessment::evaluate(self.data_member_count())
274 }
275}
276
277/// How the cluster's data-member count compares to the resilient baseline.
278#[derive(Debug, Clone, Copy, PartialEq, Eq)]
279pub struct BaselineAssessment {
280 /// The configured resilient baseline ([`RESILIENT_DATA_MEMBER_BASELINE`]).
281 pub recommended_data_members: usize,
282 /// The cluster's current data-member count.
283 pub data_members: usize,
284}
285
286impl BaselineAssessment {
287 fn evaluate(data_members: usize) -> Self {
288 Self {
289 recommended_data_members: RESILIENT_DATA_MEMBER_BASELINE,
290 data_members,
291 }
292 }
293
294 /// Does the cluster meet (or exceed) the resilient multi-writer baseline?
295 pub fn meets_baseline(&self) -> bool {
296 self.data_members >= self.recommended_data_members
297 }
298
299 /// How many more data members are needed to reach the baseline (zero once
300 /// met).
301 pub fn shortfall(&self) -> usize {
302 self.recommended_data_members
303 .saturating_sub(self.data_members)
304 }
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310
311 fn ident(cn: &str) -> NodeIdentity {
312 NodeIdentity::from_certificate_subject(cn).unwrap()
313 }
314
315 fn data_member(cn: &str) -> ClusterMember {
316 ClusterMember::joined_empty(ident(cn), MemberKind::Data)
317 }
318
319 #[test]
320 fn cluster_id_rejects_empty() {
321 assert!(ClusterId::new(" ").is_err());
322 assert_eq!(ClusterId::new(" cluster-x ").unwrap().as_str(), "cluster-x");
323 }
324
325 #[test]
326 fn member_identity_is_distinct_from_range_ownership() {
327 // A member's cluster identity is stable; assigning/removing ranges is a
328 // separate per-range role and does not change membership.
329 let mut m = data_member("CN=node-a");
330 assert!(!m.holds_user_ranges());
331 assert_eq!(m.owned_range_count(), 0);
332
333 m.assign_ranges(4);
334 assert!(m.holds_user_ranges());
335 assert_eq!(m.identity(), &ident("CN=node-a")); // identity unchanged
336 }
337
338 #[test]
339 fn data_member_count_excludes_witnesses() {
340 let cid = ClusterId::new("cluster-x").unwrap();
341 let catalog = MembershipCatalog::new(
342 cid,
343 [
344 data_member("CN=node-a"),
345 data_member("CN=node-b"),
346 ClusterMember::joined_empty(ident("CN=witness"), MemberKind::Witness),
347 ],
348 );
349 assert_eq!(catalog.len(), 3);
350 assert_eq!(catalog.data_member_count(), 2);
351 }
352
353 #[test]
354 fn three_data_members_meet_resilient_baseline() {
355 let cid = ClusterId::new("cluster-x").unwrap();
356 let catalog = MembershipCatalog::new(
357 cid,
358 [
359 data_member("CN=node-a"),
360 data_member("CN=node-b"),
361 data_member("CN=node-c"),
362 ],
363 );
364 let baseline = catalog.assess_baseline();
365 assert_eq!(baseline.recommended_data_members, 3);
366 assert!(baseline.meets_baseline());
367 assert_eq!(baseline.shortfall(), 0);
368 }
369
370 #[test]
371 fn two_data_plus_witness_does_not_meet_baseline() {
372 // A witness is not the recommended baseline: 2 data + 1 witness is
373 // below the three-data-member baseline.
374 let cid = ClusterId::new("cluster-x").unwrap();
375 let catalog = MembershipCatalog::new(
376 cid,
377 [
378 data_member("CN=node-a"),
379 data_member("CN=node-b"),
380 ClusterMember::joined_empty(ident("CN=witness"), MemberKind::Witness),
381 ],
382 );
383 let baseline = catalog.assess_baseline();
384 assert!(!baseline.meets_baseline());
385 assert_eq!(baseline.shortfall(), 1);
386 }
387
388 #[test]
389 fn admit_is_idempotent_and_preserves_ranges() {
390 let cid = ClusterId::new("cluster-x").unwrap();
391 let mut catalog = MembershipCatalog::new(cid, [data_member("CN=node-a")]);
392 catalog
393 .member_mut(&ident("CN=node-a"))
394 .unwrap()
395 .assign_ranges(3);
396
397 // Re-admitting must not reset an existing member's range count.
398 let outcome = catalog.admit(data_member("CN=node-a"));
399 assert_eq!(outcome, AdmissionOutcome::AlreadyMember);
400 assert_eq!(
401 catalog
402 .member(&ident("CN=node-a"))
403 .unwrap()
404 .owned_range_count(),
405 3
406 );
407
408 let outcome = catalog.admit(data_member("CN=node-b"));
409 assert_eq!(outcome, AdmissionOutcome::Admitted);
410 assert_eq!(catalog.len(), 2);
411 }
412
413 #[test]
414 fn autodetect_is_limited_to_authorized_members() {
415 let cid = ClusterId::new("cluster-x").unwrap();
416 let catalog = MembershipCatalog::new(cid, [data_member("CN=node-a")]);
417
418 // An authorized member is an autodetect candidate.
419 assert!(catalog.is_autodetect_eligible(&ident("CN=node-a")));
420 // An arbitrary reachable network peer that never joined is not.
421 assert!(!catalog.is_autodetect_eligible(&ident("CN=random-peer")));
422 assert_eq!(catalog.autodetect_candidates().count(), 1);
423 }
424}