Skip to main content

reddb_server/cluster/
join.rs

1//! Authenticated cluster join through seed members (issue #988, ADR 0030).
2//!
3//! Join is the explicit admission flow a candidate runs to *become* an
4//! authorized cluster member. The glossary fixes the steps: *"a candidate
5//! member authenticates against seed members, verifies cluster identity,
6//! downloads global control-plane state, and only then becomes an authorized
7//! cluster member."* Until that completes, a node is just a reachable network
8//! peer — not a member, and not something autodetect will adopt.
9//!
10//! ## The handshake, structurally
11//!
12//! A seed member holds a [`SeedAuthority`]: the cluster's [`ClusterId`], an
13//! operator-provisioned **allowlist** of which identities may join (and as what
14//! kind), and the current [`MembershipCatalog`]. A candidate sends a
15//! [`JoinRequest`] carrying the cluster id it *believes* it is joining, its
16//! authenticated [`NodeIdentity`], and the kind it intends to be. The seed:
17//!
18//! 1. **Verifies cluster identity.** A request that names a different cluster
19//!    is [`JoinRejection::WrongCluster`] — authenticating correctly to the
20//!    wrong cluster is still a rejection.
21//! 2. **Authorizes the peer.** An identity absent from the allowlist is an
22//!    unknown/unauthorized peer: [`JoinRejection::UnauthorizedPeer`]. This is
23//!    what stops "anyone who can open a connection" from joining.
24//! 3. **Checks the declared kind.** A peer allow-listed as a witness that asks
25//!    to join as a data member (or vice versa) is
26//!    [`JoinRejection::KindMismatch`].
27//! 4. **Admits and snapshots.** The candidate is added to the catalog as a
28//!    [joined-empty member](super::membership::ClusterMember::joined_empty) —
29//!    no user ranges — and the seed returns a [`ControlPlaneSnapshot`] of the
30//!    now-current membership for the candidate to adopt.
31//!
32//! Authentication itself is mTLS: the [`NodeIdentity`] in a request is the
33//! validated X.509 subject of the peer certificate. [`JoinRequest::authenticated`]
34//! is the only constructor, so a request cannot exist without a proven
35//! identity — there is no "anonymous join" shape to defend against.
36
37use super::identity::NodeIdentity;
38use super::membership::{
39    AdmissionOutcome, ClusterId, ClusterMember, MemberKind, MembershipCatalog,
40};
41use std::collections::BTreeMap;
42
43/// A candidate's request to join a cluster through a seed member.
44///
45/// The only way to build one is [`JoinRequest::authenticated`], whose
46/// `identity` is the validated certificate subject of the peer — so a request
47/// always carries a proven identity.
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct JoinRequest {
50    /// The cluster the candidate believes it is joining. Checked against the
51    /// seed's own cluster id.
52    pub target_cluster: ClusterId,
53    /// The candidate's authenticated cluster member identity.
54    pub identity: NodeIdentity,
55    /// The kind the candidate intends to join as.
56    pub kind: MemberKind,
57}
58
59impl JoinRequest {
60    /// Build a join request from an already-authenticated peer identity (the
61    /// validated mTLS certificate subject).
62    pub fn authenticated(
63        target_cluster: ClusterId,
64        identity: NodeIdentity,
65        kind: MemberKind,
66    ) -> Self {
67        Self {
68            target_cluster,
69            identity,
70            kind,
71        }
72    }
73}
74
75/// Why a join was refused. Each variant maps to one of the seed's checks.
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum JoinRejection {
78    /// The request named a different cluster than this seed serves.
79    WrongCluster {
80        expected: ClusterId,
81        presented: ClusterId,
82    },
83    /// The authenticated peer is not on the operator allowlist — an unknown or
84    /// unauthorized peer.
85    UnauthorizedPeer(NodeIdentity),
86    /// The peer is allow-listed but asked to join as the wrong kind.
87    KindMismatch {
88        identity: NodeIdentity,
89        allowed: MemberKind,
90        requested: MemberKind,
91    },
92}
93
94impl std::fmt::Display for JoinRejection {
95    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96        match self {
97            Self::WrongCluster {
98                expected,
99                presented,
100            } => write!(
101                f,
102                "join targets cluster {presented}, but this seed serves {expected}"
103            ),
104            Self::UnauthorizedPeer(id) => {
105                write!(f, "peer {id} is not an authorized cluster member")
106            }
107            Self::KindMismatch {
108                identity,
109                allowed,
110                requested,
111            } => write!(
112                f,
113                "peer {identity} is allowed as {allowed:?} but requested {requested:?}"
114            ),
115        }
116    }
117}
118
119impl std::error::Error for JoinRejection {}
120
121/// The global control-plane state a freshly admitted member downloads — the
122/// authorized membership it should adopt as its starting view.
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct ControlPlaneSnapshot {
125    pub cluster_id: ClusterId,
126    pub members: Vec<ClusterMember>,
127}
128
129/// A successful admission: the outcome (newly admitted vs. already a member)
130/// and the control-plane snapshot the candidate adopts.
131#[derive(Debug, Clone, PartialEq, Eq)]
132pub struct JoinGrant {
133    pub outcome: AdmissionOutcome,
134    pub snapshot: ControlPlaneSnapshot,
135}
136
137/// A seed member's authority to admit join candidates.
138///
139/// It owns the cluster's [`ClusterId`], the operator-provisioned allowlist of
140/// who may join, and the live [`MembershipCatalog`]. [`evaluate_join`] runs the
141/// full handshake and, on success, mutates the catalog to include the new
142/// member.
143///
144/// [`evaluate_join`]: SeedAuthority::evaluate_join
145#[derive(Debug, Clone)]
146pub struct SeedAuthority {
147    allowlist: BTreeMap<NodeIdentity, MemberKind>,
148    catalog: MembershipCatalog,
149}
150
151impl SeedAuthority {
152    /// Build a seed authority over `catalog` with the given operator
153    /// `allowlist` of identities permitted to join (and the kind each is
154    /// permitted to join as). Existing members are implicitly allow-listed.
155    pub fn new(
156        catalog: MembershipCatalog,
157        allowlist: impl IntoIterator<Item = (NodeIdentity, MemberKind)>,
158    ) -> Self {
159        let mut allow: BTreeMap<NodeIdentity, MemberKind> = allowlist.into_iter().collect();
160        // Anyone already in the catalog is, by definition, authorized.
161        for member in catalog.members() {
162            allow
163                .entry(member.identity().clone())
164                .or_insert(member.kind());
165        }
166        Self {
167            allowlist: allow,
168            catalog,
169        }
170    }
171
172    pub fn cluster_id(&self) -> &ClusterId {
173        self.catalog.cluster_id()
174    }
175
176    pub fn catalog(&self) -> &MembershipCatalog {
177        &self.catalog
178    }
179
180    /// Run the join handshake for `request`. On success the candidate is now an
181    /// authorized member of the catalog and the returned [`JoinGrant`] carries
182    /// the control-plane snapshot to adopt; on failure the catalog is
183    /// unchanged and the [`JoinRejection`] says which check failed.
184    pub fn evaluate_join(&mut self, request: JoinRequest) -> Result<JoinGrant, JoinRejection> {
185        // 1. Verify cluster identity.
186        if &request.target_cluster != self.catalog.cluster_id() {
187            return Err(JoinRejection::WrongCluster {
188                expected: self.catalog.cluster_id().clone(),
189                presented: request.target_cluster,
190            });
191        }
192
193        // 2. Authorize the authenticated peer against the allowlist.
194        let allowed_kind = match self.allowlist.get(&request.identity) {
195            Some(kind) => *kind,
196            None => return Err(JoinRejection::UnauthorizedPeer(request.identity)),
197        };
198
199        // 3. The declared kind must match what the peer is allow-listed as.
200        if allowed_kind != request.kind {
201            return Err(JoinRejection::KindMismatch {
202                identity: request.identity,
203                allowed: allowed_kind,
204                requested: request.kind,
205            });
206        }
207
208        // 4. Admit as a joined-empty member and snapshot the control plane.
209        let member = ClusterMember::joined_empty(request.identity, request.kind);
210        let outcome = self.catalog.admit(member);
211        Ok(JoinGrant {
212            outcome,
213            snapshot: self.snapshot(),
214        })
215    }
216
217    fn snapshot(&self) -> ControlPlaneSnapshot {
218        ControlPlaneSnapshot {
219            cluster_id: self.catalog.cluster_id().clone(),
220            members: self.catalog.members().cloned().collect(),
221        }
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    fn ident(cn: &str) -> NodeIdentity {
230        NodeIdentity::from_certificate_subject(cn).unwrap()
231    }
232
233    fn cid() -> ClusterId {
234        ClusterId::new("cluster-prod").unwrap()
235    }
236
237    /// A two-data-member founding cluster with `node-c` pre-authorized to join.
238    fn seed_with_pending_node_c() -> SeedAuthority {
239        let catalog = MembershipCatalog::new(
240            cid(),
241            [
242                ClusterMember::joined_empty(ident("CN=node-a"), MemberKind::Data),
243                ClusterMember::joined_empty(ident("CN=node-b"), MemberKind::Data),
244            ],
245        );
246        SeedAuthority::new(catalog, [(ident("CN=node-c"), MemberKind::Data)])
247    }
248
249    #[test]
250    fn successful_join_admits_authorized_data_member() {
251        let mut seed = seed_with_pending_node_c();
252        let req = JoinRequest::authenticated(cid(), ident("CN=node-c"), MemberKind::Data);
253
254        let grant = seed
255            .evaluate_join(req)
256            .expect("authorized join should succeed");
257        assert_eq!(grant.outcome, AdmissionOutcome::Admitted);
258
259        // The candidate is now an authorized member, and the snapshot reflects
260        // the full three-data-member cluster it should adopt.
261        assert!(seed.catalog().is_authorized(&ident("CN=node-c")));
262        assert_eq!(grant.snapshot.cluster_id, cid());
263        assert_eq!(grant.snapshot.members.len(), 3);
264        assert!(seed.catalog().assess_baseline().meets_baseline());
265    }
266
267    #[test]
268    fn joined_data_member_starts_with_no_user_ranges() {
269        let mut seed = seed_with_pending_node_c();
270        let req = JoinRequest::authenticated(cid(), ident("CN=node-c"), MemberKind::Data);
271        seed.evaluate_join(req).unwrap();
272
273        let joined = seed.catalog().member(&ident("CN=node-c")).unwrap();
274        assert!(!joined.holds_user_ranges());
275        assert_eq!(joined.owned_range_count(), 0);
276    }
277
278    #[test]
279    fn unauthorized_peer_is_rejected() {
280        let mut seed = seed_with_pending_node_c();
281        // `node-x` authenticated fine (it has a NodeIdentity) but is not on the
282        // allowlist — an unknown/unauthorized peer.
283        let req = JoinRequest::authenticated(cid(), ident("CN=node-x"), MemberKind::Data);
284
285        let err = seed
286            .evaluate_join(req)
287            .expect_err("unknown peer must be rejected");
288        assert_eq!(err, JoinRejection::UnauthorizedPeer(ident("CN=node-x")));
289        // The catalog is unchanged and the peer is not autodetect-eligible.
290        assert!(!seed.catalog().is_autodetect_eligible(&ident("CN=node-x")));
291        assert_eq!(seed.catalog().len(), 2);
292    }
293
294    #[test]
295    fn wrong_cluster_join_is_rejected() {
296        let mut seed = seed_with_pending_node_c();
297        let other = ClusterId::new("cluster-staging").unwrap();
298        // `node-c` is authorized, but it targets the wrong cluster.
299        let req = JoinRequest::authenticated(other.clone(), ident("CN=node-c"), MemberKind::Data);
300
301        let err = seed
302            .evaluate_join(req)
303            .expect_err("wrong-cluster join must be rejected");
304        assert_eq!(
305            err,
306            JoinRejection::WrongCluster {
307                expected: cid(),
308                presented: other,
309            }
310        );
311        assert!(!seed.catalog().is_authorized(&ident("CN=node-c")));
312    }
313
314    #[test]
315    fn kind_mismatch_is_rejected() {
316        let mut seed = seed_with_pending_node_c();
317        // `node-c` is allow-listed as Data but asks to join as a Witness.
318        let req = JoinRequest::authenticated(cid(), ident("CN=node-c"), MemberKind::Witness);
319
320        let err = seed
321            .evaluate_join(req)
322            .expect_err("kind mismatch must be rejected");
323        assert_eq!(
324            err,
325            JoinRejection::KindMismatch {
326                identity: ident("CN=node-c"),
327                allowed: MemberKind::Data,
328                requested: MemberKind::Witness,
329            }
330        );
331    }
332
333    #[test]
334    fn rejoin_is_idempotent() {
335        let mut seed = seed_with_pending_node_c();
336        let req = || JoinRequest::authenticated(cid(), ident("CN=node-c"), MemberKind::Data);
337
338        let first = seed.evaluate_join(req()).unwrap();
339        assert_eq!(first.outcome, AdmissionOutcome::Admitted);
340
341        let second = seed.evaluate_join(req()).unwrap();
342        assert_eq!(second.outcome, AdmissionOutcome::AlreadyMember);
343        assert_eq!(seed.catalog().len(), 3);
344    }
345
346    #[test]
347    fn autodetect_adopts_only_members_after_join() {
348        let mut seed = seed_with_pending_node_c();
349        // Before join: node-c is not an autodetect candidate.
350        assert!(!seed.catalog().is_autodetect_eligible(&ident("CN=node-c")));
351
352        seed.evaluate_join(JoinRequest::authenticated(
353            cid(),
354            ident("CN=node-c"),
355            MemberKind::Data,
356        ))
357        .unwrap();
358
359        // After join: it is, and a never-joined peer still is not.
360        assert!(seed.catalog().is_autodetect_eligible(&ident("CN=node-c")));
361        assert!(!seed.catalog().is_autodetect_eligible(&ident("CN=stranger")));
362    }
363}