fleetfs_raft/confchange/changer.rs
1// Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0.
2
3use crate::eraftpb::{ConfChangeSingle, ConfChangeType};
4use crate::tracker::{Configuration, ProgressMap, ProgressTracker};
5use crate::{Error, Result};
6
7/// Change log for progress map.
8pub enum MapChangeType {
9 Add,
10 Remove,
11}
12
13/// Changes made by `Changer`.
14pub type MapChange = Vec<(u64, MapChangeType)>;
15
16/// A map that stores updates instead of apply them directly.
17pub struct IncrChangeMap<'a> {
18 changes: MapChange,
19 base: &'a ProgressMap,
20}
21
22impl IncrChangeMap<'_> {
23 pub fn into_changes(self) -> MapChange {
24 self.changes
25 }
26
27 fn contains(&self, id: u64) -> bool {
28 match self.changes.iter().rfind(|(i, _)| *i == id) {
29 Some((_, MapChangeType::Remove)) => false,
30 Some((_, MapChangeType::Add)) => true,
31 None => self.base.contains_key(&id),
32 }
33 }
34}
35
36/// Changer facilitates configuration changes. It exposes methods to handle
37/// simple and joint consensus while performing the proper validation that allows
38/// refusing invalid configuration changes before they affect the active
39/// configuration.
40pub struct Changer<'a> {
41 tracker: &'a ProgressTracker,
42}
43
44impl Changer<'_> {
45 /// Creates a changer.
46 pub fn new(tracker: &ProgressTracker) -> Changer {
47 Changer { tracker }
48 }
49
50 /// Verifies that the outgoing (=right) majority config of the joint
51 /// config is empty and initializes it with a copy of the incoming (=left)
52 /// majority config. That is, it transitions from
53 /// ```text
54 /// (1 2 3)&&()
55 /// ```
56 /// to
57 /// ```text
58 /// (1 2 3)&&(1 2 3)
59 /// ```.
60 ///
61 /// The supplied changes are then applied to the incoming majority config,
62 /// resulting in a joint configuration that in terms of the Raft thesis[1]
63 /// (Section 4.3) corresponds to `C_{new,old}`.
64 ///
65 /// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
66 pub fn enter_joint(
67 &self,
68 auto_leave: bool,
69 ccs: &[ConfChangeSingle],
70 ) -> Result<(Configuration, MapChange)> {
71 if super::joint(self.tracker.conf()) {
72 return Err(Error::ConfChangeError("config is already joint".to_owned()));
73 }
74 let (mut cfg, mut prs) = self.check_and_copy()?;
75 if cfg.voters().incoming.is_empty() {
76 // We allow adding nodes to an empty config for convenience (testing and
77 // bootstrap), but you can't enter a joint state.
78 return Err(Error::ConfChangeError(
79 "can't make a zero-voter config joint".to_owned(),
80 ));
81 }
82 cfg.voters
83 .outgoing
84 .extend(cfg.voters.incoming.iter().cloned());
85 self.apply(&mut cfg, &mut prs, ccs)?;
86 cfg.auto_leave = auto_leave;
87 check_invariants(&cfg, &prs)?;
88 Ok((cfg, prs.into_changes()))
89 }
90
91 /// Transitions out of a joint configuration. It is an error to call this method if
92 /// the configuration is not joint, i.e. if the outgoing majority config is empty.
93 ///
94 /// The outgoing majority config of the joint configuration will be removed, that is,
95 /// the incoming config is promoted as the sole decision maker. In the notation of
96 /// the Raft thesis[1] (Section 4.3), this method transitions from `C_{new,old}` into
97 /// `C_new`.
98 ///
99 /// At the same time, any staged learners (LearnersNext) the addition of which was
100 /// held back by an overlapping voter in the former outgoing config will be inserted
101 /// into Learners.
102 ///
103 /// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
104 pub fn leave_joint(&self) -> Result<(Configuration, MapChange)> {
105 if !super::joint(self.tracker.conf()) {
106 return Err(Error::ConfChangeError(
107 "can't leave a non-joint config".to_owned(),
108 ));
109 }
110 let (mut cfg, mut prs) = self.check_and_copy()?;
111 if cfg.voters().outgoing.is_empty() {
112 return Err(Error::ConfChangeError(format!(
113 "configuration is not joint: {:?}",
114 cfg
115 )));
116 }
117 cfg.learners.extend(cfg.learners_next.drain());
118
119 for id in &*cfg.voters.outgoing {
120 if !cfg.voters.incoming.contains(id) && !cfg.learners.contains(id) {
121 prs.changes.push((*id, MapChangeType::Remove));
122 }
123 }
124
125 cfg.voters.outgoing.clear();
126 cfg.auto_leave = false;
127 check_invariants(&cfg, &prs)?;
128 Ok((cfg, prs.into_changes()))
129 }
130
131 /// Carries out a series of configuration changes that (in aggregate) mutates the
132 /// incoming majority config `Voters[0]` by at most one. This method will return an
133 /// error if that is not the case, if the resulting quorum is zero, or if the
134 /// configuration is in a joint state (i.e. if there is an outgoing configuration).
135 pub fn simple(&mut self, ccs: &[ConfChangeSingle]) -> Result<(Configuration, MapChange)> {
136 if super::joint(self.tracker.conf()) {
137 return Err(Error::ConfChangeError(
138 "can't apply simple config change in joint config".to_owned(),
139 ));
140 }
141 let (mut cfg, mut prs) = self.check_and_copy()?;
142 self.apply(&mut cfg, &mut prs, ccs)?;
143
144 if cfg
145 .voters
146 .incoming
147 .symmetric_difference(&self.tracker.conf().voters.incoming)
148 .count()
149 > 1
150 {
151 return Err(Error::ConfChangeError(
152 "more than one voter changed without entering joint config".to_owned(),
153 ));
154 }
155 check_invariants(&cfg, &prs)?;
156 Ok((cfg, prs.into_changes()))
157 }
158
159 /// Applies a change to the configuration. By convention, changes to voters are always
160 /// made to the incoming majority config. Outgoing is either empty or preserves the
161 /// outgoing majority configuration while in a joint state.
162 fn apply(
163 &self,
164 cfg: &mut Configuration,
165 prs: &mut IncrChangeMap,
166 ccs: &[ConfChangeSingle],
167 ) -> Result<()> {
168 for cc in ccs {
169 if cc.node_id == 0 {
170 // Replaces the NodeID with zero if it decides (downstream of
171 // raft) to not apply a change, so we have to have explicit code
172 // here to ignore these.
173 continue;
174 }
175 match cc.get_change_type() {
176 ConfChangeType::AddNode => self.make_voter(cfg, prs, cc.node_id),
177 ConfChangeType::AddLearnerNode => self.make_learner(cfg, prs, cc.node_id),
178 ConfChangeType::RemoveNode => self.remove(cfg, prs, cc.node_id),
179 }
180 }
181 if cfg.voters().incoming.is_empty() {
182 return Err(Error::ConfChangeError("removed all voters".to_owned()));
183 }
184 Ok(())
185 }
186
187 /// Adds or promotes the given ID to be a voter in the incoming majority config.
188 fn make_voter(&self, cfg: &mut Configuration, prs: &mut IncrChangeMap, id: u64) {
189 if !prs.contains(id) {
190 self.init_progress(cfg, prs, id, false);
191 return;
192 }
193
194 cfg.voters.incoming.insert(id);
195 cfg.learners.remove(&id);
196 cfg.learners_next.remove(&id);
197 }
198
199 /// Makes the given ID a learner or stages it to be a learner once an active joint
200 /// configuration is exited.
201 ///
202 /// The former happens when the peer is not a part of the outgoing config, in which
203 /// case we either add a new learner or demote a voter in the incoming config.
204 ///
205 /// The latter case occurs when the configuration is joint and the peer is a voter
206 /// in the outgoing config. In that case, we do not want to add the peer as a learner
207 /// because then we'd have to track a peer as a voter and learner simultaneously.
208 /// Instead, we add the learner to LearnersNext, so that it will be added to Learners
209 /// the moment the outgoing config is removed by LeaveJoint().
210 fn make_learner(&self, cfg: &mut Configuration, prs: &mut IncrChangeMap, id: u64) {
211 if !prs.contains(id) {
212 self.init_progress(cfg, prs, id, true);
213 return;
214 }
215
216 if cfg.learners.contains(&id) {
217 return;
218 }
219
220 cfg.voters.incoming.remove(&id);
221 cfg.learners.remove(&id);
222 cfg.learners_next.remove(&id);
223
224 // Use LearnersNext if we can't add the learner to Learners directly, i.e.
225 // if the peer is still tracked as a voter in the outgoing config. It will
226 // be turned into a learner in LeaveJoint().
227 //
228 // Otherwise, add a regular learner right away.
229 if cfg.voters().outgoing.contains(&id) {
230 cfg.learners_next.insert(id);
231 } else {
232 cfg.learners.insert(id);
233 }
234 }
235
236 /// Removes this peer as a voter or learner from the incoming config.
237 fn remove(&self, cfg: &mut Configuration, prs: &mut IncrChangeMap, id: u64) {
238 if !prs.contains(id) {
239 return;
240 }
241
242 cfg.voters.incoming.remove(&id);
243 cfg.learners.remove(&id);
244 cfg.learners_next.remove(&id);
245
246 // If the peer is still a voter in the outgoing config, keep the Progress.
247 if !cfg.voters.outgoing.contains(&id) {
248 prs.changes.push((id, MapChangeType::Remove));
249 }
250 }
251
252 /// Initializes a new progress for the given node or learner.
253 fn init_progress(
254 &self,
255 cfg: &mut Configuration,
256 prs: &mut IncrChangeMap,
257 id: u64,
258 is_learner: bool,
259 ) {
260 if !is_learner {
261 cfg.voters.incoming.insert(id);
262 } else {
263 cfg.learners.insert(id);
264 }
265 prs.changes.push((id, MapChangeType::Add));
266 }
267
268 /// Copies the tracker's config. It returns an error if checkInvariants does.
269 ///
270 /// Unlike Etcd, we don't copy progress as we don't need to mutate the `is_learner`
271 /// flags. Additions and Removals should be done after everything is checked OK.
272 fn check_and_copy(&self) -> Result<(Configuration, IncrChangeMap)> {
273 let prs = IncrChangeMap {
274 changes: vec![],
275 base: self.tracker.progress(),
276 };
277 check_invariants(self.tracker.conf(), &prs)?;
278 Ok((self.tracker.conf().clone(), prs))
279 }
280}
281
282/// Makes sure that the config and progress are compatible with each other.
283/// This is used to check both what the Changer is initialized with, as well
284/// as what it returns.
285fn check_invariants(cfg: &Configuration, prs: &IncrChangeMap) -> Result<()> {
286 // NB: intentionally allow the empty config. In production we'll never see a
287 // non-empty config (we prevent it from being created) but we will need to
288 // be able to *create* an initial config, for example during bootstrap (or
289 // during tests). Instead of having to hand-code this, we allow
290 // transitioning from an empty config into any other legal and non-empty
291 // config.
292 for id in cfg.voters().ids().iter() {
293 if !prs.contains(id) {
294 return Err(Error::ConfChangeError(format!(
295 "no progress for voter {}",
296 id
297 )));
298 }
299 }
300 for id in &cfg.learners {
301 if !prs.contains(*id) {
302 return Err(Error::ConfChangeError(format!(
303 "no progress for learner {}",
304 id
305 )));
306 }
307 // Conversely Learners and Voters doesn't intersect at all.
308 if cfg.voters().outgoing.contains(id) {
309 return Err(Error::ConfChangeError(format!(
310 "{} is in learners and outgoing voters",
311 id
312 )));
313 }
314 if cfg.voters().incoming.contains(id) {
315 return Err(Error::ConfChangeError(format!(
316 "{} is in learners and incoming voters",
317 id
318 )));
319 }
320 }
321 for id in &cfg.learners_next {
322 if !prs.contains(*id) {
323 return Err(Error::ConfChangeError(format!(
324 "no progress for learner(next) {}",
325 id
326 )));
327 }
328
329 // Any staged learner was staged because it could not be directly added due
330 // to a conflicting voter in the outgoing config.
331 if !cfg.voters().outgoing.contains(id) {
332 return Err(Error::ConfChangeError(format!(
333 "{} is in learners_next and outgoing voters",
334 id
335 )));
336 }
337 }
338
339 if !super::joint(cfg) {
340 // Etcd enforces outgoing and learner_next to be nil map. But there is no nil
341 // in rust. We just check empty for simplicity.
342 if !cfg.learners_next().is_empty() {
343 return Err(Error::ConfChangeError(
344 "learners_next must be empty when not joint".to_owned(),
345 ));
346 }
347 if cfg.auto_leave {
348 return Err(Error::ConfChangeError(
349 "auto_leave must be false when not joint".to_owned(),
350 ));
351 }
352 }
353
354 Ok(())
355}