nodedb_cluster/multi_raft/conf_change.rs
1// SPDX-License-Identifier: BUSL-1.1
2
3//! Raft configuration-change propose/apply with learner semantics.
4//!
5//! `propose_conf_change` writes a `ConfChange` payload (see
6//! `crate::conf_change::ConfChange`) into the group leader's Raft log as a
7//! regular entry with a special prefix byte. The entry replicates via the
8//! normal `AppendEntries` channel; no new transport is needed.
9//!
10//! `apply_conf_change` is called by the tick loop when a committed entry
11//! is identified as a conf change. It updates both the in-memory
12//! `RaftNode` peer set and the `RoutingTable`:
13//!
14//! - `AddNode` → voter added to `RaftNode.peers` and `routing.members`.
15//! - `RemoveNode` → voter removed from both.
16//! - `AddLearner` → learner added to `RaftNode.learners` and `routing.learners`.
17//! - `PromoteLearner` → learner moved from `learners` to `members` in both;
18//! if the promoted peer is *this* node, also flips the local role from
19//! `Learner` to `Follower`.
20
21use tracing::debug;
22
23use crate::conf_change::{ConfChange, ConfChangeType};
24use crate::error::{ClusterError, Result};
25
26use super::core::MultiRaft;
27
28impl MultiRaft {
29 /// Propose a configuration change to a Raft group.
30 ///
31 /// The change is serialized into the group's Raft log as a
32 /// regular entry with a distinguishing prefix byte. It
33 /// replicates through the normal `AppendEntries` path and is
34 /// applied by every follower replica when the entry commits
35 /// (see `apply_conf_change`).
36 ///
37 /// # Single-voter vs. multi-voter groups
38 ///
39 /// Single-voter groups commit inside `node.propose` itself
40 /// (see `nodedb_raft::node::RaftNode::propose` single-voter
41 /// branch). In that case the commit has already happened by
42 /// the time we return, so we safely apply the change inline:
43 /// any caller that reads routing immediately after the
44 /// propose sees the final state.
45 ///
46 /// Multi-voter groups commit asynchronously once enough
47 /// followers have replicated the entry. The apply then
48 /// happens on the tick loop after it observes the updated
49 /// `commit_index`. We MUST NOT inline-apply in that case —
50 /// if the leader steps down before replication completes, a
51 /// new leader may truncate the log entry and the local state
52 /// would be permanently ahead of the committed state with no
53 /// rollback path. Callers that need to wait for the apply
54 /// should poll the routing table (see
55 /// `raft_loop::join::wait_for_routing_contains_learner`).
56 ///
57 /// Returns `(group_id, log_index)` on success.
58 pub fn propose_conf_change(
59 &mut self,
60 group_id: u64,
61 change: &ConfChange,
62 ) -> Result<(u64, u64)> {
63 let (log_index, committed_immediately) = {
64 let node = self
65 .groups
66 .get_mut(&group_id)
67 .ok_or(ClusterError::GroupNotFound { group_id })?;
68 let data = change.to_entry_data();
69 let log_index = node.propose(data)?;
70 // A single-voter group self-commits inside `propose`:
71 // its `commit_index` is bumped to the new `log_index`
72 // before we return. Detecting this is the one safe
73 // trigger for an inline apply.
74 let committed_immediately = node.commit_index() >= log_index;
75 (log_index, committed_immediately)
76 };
77
78 if committed_immediately {
79 self.apply_conf_change(group_id, change)?;
80 }
81 Ok((group_id, log_index))
82 }
83
84 /// Apply a committed configuration change to this node's view of the
85 /// given Raft group.
86 ///
87 /// This is called from the tick loop for every committed entry
88 /// detected as a conf-change (via `ConfChange::from_entry_data`). It
89 /// must be idempotent with respect to no-op changes so replaying the
90 /// log after a crash does not double-apply.
91 pub fn apply_conf_change(&mut self, group_id: u64, change: &ConfChange) -> Result<()> {
92 let self_node_id = self.node_id;
93
94 let node = self
95 .groups
96 .get_mut(&group_id)
97 .ok_or(ClusterError::GroupNotFound { group_id })?;
98
99 match change.change_type {
100 ConfChangeType::AddNode => {
101 // Direct voter add (used for legacy or bootstrap paths).
102 node.add_peer(change.node_id);
103 if let Some(info) = self.routing.group_info(group_id)
104 && !info.members.contains(&change.node_id)
105 {
106 let mut new_members = info.members.clone();
107 new_members.push(change.node_id);
108 self.routing.set_group_members(group_id, new_members);
109 }
110 }
111 ConfChangeType::RemoveNode => {
112 node.remove_peer(change.node_id);
113 if let Some(info) = self.routing.group_info(group_id) {
114 let new_members: Vec<u64> = info
115 .members
116 .iter()
117 .copied()
118 .filter(|&id| id != change.node_id)
119 .collect();
120 self.routing.set_group_members(group_id, new_members);
121 }
122 }
123 ConfChangeType::AddLearner => {
124 // Non-voting add: peer enters learners on both the
125 // RaftNode and the routing table. Voting quorum does not
126 // change.
127 node.add_learner(change.node_id);
128 self.routing.add_group_learner(group_id, change.node_id);
129 }
130 ConfChangeType::PromoteLearner => {
131 // Learner → voter. RaftNode and routing both update.
132 // If this is our own promotion, we also need to flip the
133 // local role from `Learner` to `Follower` so subsequent
134 // ticks run election timeouts normally.
135 let promoted = node.promote_learner(change.node_id);
136 if promoted {
137 self.routing.promote_group_learner(group_id, change.node_id);
138 }
139 if change.node_id == self_node_id {
140 node.promote_self_to_voter();
141 }
142 }
143 }
144
145 debug!(
146 node = self.node_id,
147 group = group_id,
148 change_type = ?change.change_type,
149 target_node = change.node_id,
150 voters = ?self.groups.get(&group_id).map(|n| n.voters().to_vec()),
151 learners = ?self.groups.get(&group_id).map(|n| n.learners().to_vec()),
152 "applied conf change"
153 );
154
155 Ok(())
156 }
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162 use crate::routing::RoutingTable;
163 use nodedb_raft::NodeRole;
164
165 use super::super::core::MultiRaft;
166
167 fn new_mr(node_id: u64, group_ids: &[u64]) -> MultiRaft {
168 let dir = tempfile::tempdir().unwrap();
169 let rt = RoutingTable::uniform(group_ids.len() as u64, &[node_id], 1);
170 let mut mr = MultiRaft::new(node_id, rt, dir.path().to_path_buf());
171 std::mem::forget(dir); // Keep temp dir alive for the duration of the test.
172 for &gid in group_ids {
173 mr.add_group(gid, vec![]).unwrap();
174 }
175 mr
176 }
177
178 #[test]
179 fn apply_add_learner_updates_routing_and_raftnode() {
180 let mut mr = new_mr(1, &[0]);
181 let change = ConfChange {
182 change_type: ConfChangeType::AddLearner,
183 node_id: 2,
184 };
185 mr.apply_conf_change(0, &change).unwrap();
186
187 // RaftNode: learner tracked, voters unchanged.
188 let node = mr.groups.get(&0).unwrap();
189 assert_eq!(node.learners(), &[2]);
190 assert!(node.voters().is_empty());
191
192 // Routing: learners populated, members untouched.
193 let info = mr.routing.group_info(0).unwrap();
194 assert_eq!(info.learners, vec![2]);
195 assert_eq!(info.members, vec![1]); // Self.
196 }
197
198 #[test]
199 fn apply_promote_learner_moves_peer_to_voters() {
200 let mut mr = new_mr(1, &[0]);
201 mr.apply_conf_change(
202 0,
203 &ConfChange {
204 change_type: ConfChangeType::AddLearner,
205 node_id: 2,
206 },
207 )
208 .unwrap();
209 mr.apply_conf_change(
210 0,
211 &ConfChange {
212 change_type: ConfChangeType::PromoteLearner,
213 node_id: 2,
214 },
215 )
216 .unwrap();
217
218 let node = mr.groups.get(&0).unwrap();
219 assert_eq!(node.voters(), &[2]);
220 assert!(node.learners().is_empty());
221
222 let info = mr.routing.group_info(0).unwrap();
223 assert_eq!(info.learners, Vec::<u64>::new());
224 assert!(info.members.contains(&2));
225 }
226
227 #[test]
228 fn apply_promote_self_flips_role() {
229 // Simulate receiving PromoteLearner(self=2) after being added as
230 // a learner to group 0.
231 let dir = tempfile::tempdir().unwrap();
232 let rt = RoutingTable::uniform(1, &[1, 2], 1);
233 let mut mr = MultiRaft::new(2, rt, dir.path().to_path_buf());
234 mr.add_group_as_learner(0, vec![1], vec![]).unwrap();
235
236 // Inject ourselves into the learners list so promote_learner has
237 // something to find. (In the real flow this happens via
238 // `AddLearner` applied from the log; we short-circuit for the
239 // unit test.)
240 mr.groups.get_mut(&0).unwrap().add_learner(2);
241 // Technically `add_learner(self_id)` is a no-op guard — force
242 // config.learners manually via promoting through a faux path:
243 // re-apply AddLearner from apply_conf_change, which tolerates
244 // self-id collision.
245 //
246 // For this test, the simpler route is to construct a tiny fake
247 // and check that `promote_self_to_voter` is called on self.
248 // Since the guard in add_learner skips self, we can't stage that
249 // state cleanly. Instead we directly verify the role flip path:
250 let node = mr.groups.get_mut(&0).unwrap();
251 assert_eq!(node.role(), NodeRole::Learner);
252 node.promote_self_to_voter();
253 assert_eq!(node.role(), NodeRole::Follower);
254 }
255}