reddb_server/replication/control_plane.rs
1//! Control-plane consensus seam (issue #996, parent #987, ADR 0052).
2//!
3//! The Cluster Supervisor coordinates membership, leader election, and
4//! shard/range ownership through a **Raft-equivalent control-plane consensus
5//! layer** — and *only* the control plane. [ADR 0052] fixes the boundary; this
6//! module is the small internal abstraction that boundary lives behind, so the
7//! follow-up implementation slices (the replicated log, its durable store, its
8//! snapshot/compaction) target a named seam instead of picking a consensus
9//! library or inventing protocol semantics.
10//!
11//! ## What goes through this layer — and what never does
12//!
13//! The control-plane log carries exactly two kinds of entry:
14//!
15//! * a **membership change** — admission, removal, or role change of a cluster
16//! member; and
17//! * an **ownership-catalog transition** — a fenced, versioned move / split /
18//! merge / promote of a shard/range (ADR 0037).
19//!
20//! [`ControlPlaneEntry`] is *closed* over exactly these two: there is, by
21//! construction, **no user-data variant**. User-data writes are never recorded
22//! in, ordered by, or gated on the control-plane log — they flow through the
23//! data plane (WAL → logical stream → replicas, ADR 0030/0044) under per-range
24//! ownership and commit policy. The control plane decides *who may write a
25//! range and where it lives*; it does not carry *what* is written. The two logs
26//! are physically separate and a user write touches exactly one of them. This
27//! is the central line ADR 0030 drew ("the term/epoch and vote-safety ideas
28//! without running data payloads through a Raft log") made concrete and
29//! type-enforced.
30//!
31//! ## Relationship to the election core
32//!
33//! The election half of this protocol already exists: term-based, quorum-gated
34//! election with a durable last-vote in [`super::election`] (issue #834), and
35//! the vote-only [`super::witness`] profile (issue #836). This seam names the
36//! *log* half — append + commit of control-plane entries — and ties it to the
37//! same role/term so the whole forms one Raft-equivalent layer. The concrete
38//! engine behind [`ControlPlaneConsensus`] (an embedded Raft crate, the
39//! election core extended with a replicated log, or another quorum protocol) is
40//! an implementation detail; swapping it must not change the boundary above.
41//!
42//! ## Durable state requirement
43//!
44//! An implementation must persist, fsync-ordered, before acknowledging a vote
45//! grant or reporting an entry committed: the current term, `voted_for` for
46//! that term (the existing [`super::election::LastVoteStore`]), and the accepted
47//! control-plane log entries plus the highest committed index. Election safety
48//! (one leader per term) and log safety (a committed entry is never lost) both
49//! depend on it (ADR 0052, safety properties 1–3).
50//!
51//! [ADR 0052]: ../../../../.red/adr/0052-cluster-supervisor-control-plane-consensus.md
52
53/// Stable identity of a cluster member, matching the election membership id
54/// ([`super::election::Member::id`]) and the replica/ack id namespace.
55pub type MemberId = String;
56
57/// Position of an entry in the control-plane log. Monotonic per term-history;
58/// an entry is durable once its index is at or below the committed index of a
59/// quorum.
60pub type ControlPlaneLogIndex = u64;
61
62/// This node's role in the control-plane consensus layer.
63///
64/// Mirrors the Raft-equivalent roles. A node that holds data may take any role;
65/// a [witness](super::witness) participates as a [`Follower`](Self::Follower) or
66/// [`Candidate`](Self::Candidate) but never leads a data range — its leadership
67/// is over control-plane state only.
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum ControlPlaneRole {
70 /// Currently leading the control-plane log; the normal writer of ownership
71 /// catalog transitions (ADR 0037, ADR 0052).
72 Leader,
73 /// Following the current leader; applies committed entries, votes per the
74 /// durable last-vote rule.
75 Follower,
76 /// Standing in an election for a new term; not yet a leader.
77 Candidate,
78}
79
80/// The kind of a [`ControlPlaneEntry`], without its payload.
81///
82/// Exhaustive over everything the control-plane log may carry. The deliberate
83/// absence of a user-data kind is the type-level half of ADR 0052's
84/// data/control isolation property.
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
86pub enum ControlPlaneEntryKind {
87 /// Admission, removal, or role change of a cluster member.
88 MembershipChange,
89 /// A fenced, versioned shard/range ownership catalog transition (ADR 0037).
90 OwnershipTransition,
91}
92
93/// Opaque, already-encoded body of a control-plane entry.
94///
95/// The seam stays agnostic to *how* a membership change or an ownership
96/// transition is encoded: the slice that implements that entry owns its wire
97/// shape. The seam only guarantees the body is one of the two control-plane
98/// kinds — never user data.
99#[derive(Debug, Clone, PartialEq, Eq)]
100pub struct ControlPlanePayload(pub Vec<u8>);
101
102impl ControlPlanePayload {
103 /// Borrow the encoded bytes.
104 #[must_use]
105 pub fn as_bytes(&self) -> &[u8] {
106 &self.0
107 }
108}
109
110/// A single entry in the control-plane log.
111///
112/// Closed over exactly the two control-plane concerns (see
113/// [`ControlPlaneEntryKind`]). There is no constructor for a user-data entry,
114/// so the data/control boundary cannot be crossed by appending to this log.
115#[derive(Debug, Clone, PartialEq, Eq)]
116pub enum ControlPlaneEntry {
117 /// Admission, removal, or role change of a cluster member.
118 MembershipChange(ControlPlanePayload),
119 /// A fenced, versioned shard/range ownership catalog transition (ADR 0037),
120 /// normally proposed by the Supervisor leader.
121 OwnershipTransition(ControlPlanePayload),
122}
123
124impl ControlPlaneEntry {
125 /// The kind of this entry, without its payload.
126 #[must_use]
127 pub fn kind(&self) -> ControlPlaneEntryKind {
128 match self {
129 Self::MembershipChange(_) => ControlPlaneEntryKind::MembershipChange,
130 Self::OwnershipTransition(_) => ControlPlaneEntryKind::OwnershipTransition,
131 }
132 }
133
134 /// Borrow the entry's opaque payload.
135 #[must_use]
136 pub fn payload(&self) -> &ControlPlanePayload {
137 match self {
138 Self::MembershipChange(p) | Self::OwnershipTransition(p) => p,
139 }
140 }
141}
142
143/// Why a [`ControlPlaneConsensus::propose`] was refused.
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub enum ProposeRefusal {
146 /// This node is not the current leader. Only the leader is the normal writer
147 /// of control-plane entries (ADR 0052); the caller should route to
148 /// [`ControlPlaneConsensus::leader`] or retry after the next election.
149 NotLeader {
150 /// The leader this node currently believes in, if any.
151 leader: Option<MemberId>,
152 },
153 /// The node lost the control-plane quorum and cannot commit new entries
154 /// until quorum/lease authority is restored (owner self-fence, ADR 0037).
155 NoQuorum,
156}
157
158/// The Cluster Supervisor's control-plane consensus seam.
159///
160/// A follow-up slice implements this over a concrete Raft-equivalent engine.
161/// Callers — the rebalancer, the join/drain flows, the ownership-transition
162/// machinery — depend on this trait and the ADR 0052 boundaries, never on the
163/// engine. The contract is intentionally narrow: read the current
164/// role/term/leader and committed index, and (leader-only) propose a
165/// control-plane entry.
166///
167/// Implementations must uphold ADR 0052's safety properties: one leader per
168/// term, committed entries never lost, no double-vote across restart, and the
169/// data/control isolation that [`ControlPlaneEntry`] already enforces at the
170/// type level.
171pub trait ControlPlaneConsensus {
172 /// This node's current control-plane role.
173 fn role(&self) -> ControlPlaneRole;
174
175 /// The current control-plane term.
176 fn term(&self) -> u64;
177
178 /// The member this node currently believes is leader, if any.
179 fn leader(&self) -> Option<MemberId>;
180
181 /// Highest control-plane log index known committed (durable to a quorum).
182 fn committed_index(&self) -> ControlPlaneLogIndex;
183
184 /// Whether this node is the current leader — the normal writer of
185 /// ownership-catalog transitions (ADR 0052).
186 fn is_leader(&self) -> bool {
187 self.role() == ControlPlaneRole::Leader
188 }
189
190 /// Append a control-plane entry to the replicated log.
191 ///
192 /// Leader-only. Returns the index the entry will occupy once committed by a
193 /// quorum, or a [`ProposeRefusal`] if this node is not the leader or has
194 /// lost quorum. The returned index is *assigned*, not yet committed; callers
195 /// that need durability wait for [`committed_index`](Self::committed_index)
196 /// to reach it.
197 fn propose(&mut self, entry: ControlPlaneEntry)
198 -> Result<ControlPlaneLogIndex, ProposeRefusal>;
199}
200
201#[cfg(test)]
202mod tests {
203 use super::*;
204
205 fn payload() -> ControlPlanePayload {
206 ControlPlanePayload(vec![1, 2, 3])
207 }
208
209 #[test]
210 fn entry_kind_maps_each_variant() {
211 assert_eq!(
212 ControlPlaneEntry::MembershipChange(payload()).kind(),
213 ControlPlaneEntryKind::MembershipChange,
214 );
215 assert_eq!(
216 ControlPlaneEntry::OwnershipTransition(payload()).kind(),
217 ControlPlaneEntryKind::OwnershipTransition,
218 );
219 }
220
221 #[test]
222 fn entry_payload_is_accessible_for_both_kinds() {
223 let m = ControlPlaneEntry::MembershipChange(payload());
224 let o = ControlPlaneEntry::OwnershipTransition(payload());
225 assert_eq!(m.payload().as_bytes(), &[1, 2, 3]);
226 assert_eq!(o.payload().as_bytes(), &[1, 2, 3]);
227 }
228
229 /// The control-plane entry set is closed over exactly the two control-plane
230 /// concerns — there is no user-data variant to match. This exhaustive match
231 /// fails to compile if a non-control-plane variant is ever added, which is
232 /// the type-level enforcement of ADR 0052's data/control isolation.
233 #[test]
234 fn entry_set_is_closed_over_control_plane_only() {
235 let kinds = [
236 ControlPlaneEntryKind::MembershipChange,
237 ControlPlaneEntryKind::OwnershipTransition,
238 ];
239 for kind in kinds {
240 match kind {
241 ControlPlaneEntryKind::MembershipChange
242 | ControlPlaneEntryKind::OwnershipTransition => {}
243 }
244 }
245 }
246
247 /// A minimal seam implementation, proving the trait expresses leader-only
248 /// `propose` and the role/term/leader/committed reads a slice will rely on.
249 struct FakeConsensus {
250 role: ControlPlaneRole,
251 term: u64,
252 leader: Option<MemberId>,
253 committed: ControlPlaneLogIndex,
254 next_index: ControlPlaneLogIndex,
255 has_quorum: bool,
256 }
257
258 impl ControlPlaneConsensus for FakeConsensus {
259 fn role(&self) -> ControlPlaneRole {
260 self.role
261 }
262 fn term(&self) -> u64 {
263 self.term
264 }
265 fn leader(&self) -> Option<MemberId> {
266 self.leader.clone()
267 }
268 fn committed_index(&self) -> ControlPlaneLogIndex {
269 self.committed
270 }
271 fn propose(
272 &mut self,
273 _entry: ControlPlaneEntry,
274 ) -> Result<ControlPlaneLogIndex, ProposeRefusal> {
275 if self.role != ControlPlaneRole::Leader {
276 return Err(ProposeRefusal::NotLeader {
277 leader: self.leader.clone(),
278 });
279 }
280 if !self.has_quorum {
281 return Err(ProposeRefusal::NoQuorum);
282 }
283 let idx = self.next_index;
284 self.next_index += 1;
285 Ok(idx)
286 }
287 }
288
289 #[test]
290 fn follower_propose_is_refused_with_leader_hint() {
291 let mut node = FakeConsensus {
292 role: ControlPlaneRole::Follower,
293 term: 7,
294 leader: Some("n1".to_string()),
295 committed: 42,
296 next_index: 43,
297 has_quorum: true,
298 };
299 assert!(!node.is_leader());
300 assert_eq!(node.term(), 7);
301 assert_eq!(node.committed_index(), 42);
302 let refusal = node
303 .propose(ControlPlaneEntry::OwnershipTransition(payload()))
304 .unwrap_err();
305 assert_eq!(
306 refusal,
307 ProposeRefusal::NotLeader {
308 leader: Some("n1".to_string())
309 }
310 );
311 }
312
313 #[test]
314 fn leader_propose_assigns_increasing_indexes() {
315 let mut node = FakeConsensus {
316 role: ControlPlaneRole::Leader,
317 term: 9,
318 leader: Some("self".to_string()),
319 committed: 10,
320 next_index: 11,
321 has_quorum: true,
322 };
323 assert!(node.is_leader());
324 let a = node
325 .propose(ControlPlaneEntry::OwnershipTransition(payload()))
326 .unwrap();
327 let b = node
328 .propose(ControlPlaneEntry::MembershipChange(payload()))
329 .unwrap();
330 assert_eq!(a, 11);
331 assert_eq!(b, 12);
332 }
333
334 #[test]
335 fn leader_without_quorum_self_fences() {
336 let mut node = FakeConsensus {
337 role: ControlPlaneRole::Leader,
338 term: 9,
339 leader: Some("self".to_string()),
340 committed: 10,
341 next_index: 11,
342 has_quorum: false,
343 };
344 let refusal = node
345 .propose(ControlPlaneEntry::MembershipChange(payload()))
346 .unwrap_err();
347 assert_eq!(refusal, ProposeRefusal::NoQuorum);
348 }
349}