fleetfs_raft/confchange/
changer.rs

1// Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0.
2
3use crate::eraftpb::{ConfChangeSingle, ConfChangeType};
4use crate::tracker::{Configuration, ProgressMap, ProgressTracker};
5use crate::{Error, Result};
6
7/// Change log for progress map.
8pub enum MapChangeType {
9    Add,
10    Remove,
11}
12
13/// Changes made by `Changer`.
14pub type MapChange = Vec<(u64, MapChangeType)>;
15
16/// A map that stores updates instead of apply them directly.
17pub struct IncrChangeMap<'a> {
18    changes: MapChange,
19    base: &'a ProgressMap,
20}
21
22impl IncrChangeMap<'_> {
23    pub fn into_changes(self) -> MapChange {
24        self.changes
25    }
26
27    fn contains(&self, id: u64) -> bool {
28        match self.changes.iter().rfind(|(i, _)| *i == id) {
29            Some((_, MapChangeType::Remove)) => false,
30            Some((_, MapChangeType::Add)) => true,
31            None => self.base.contains_key(&id),
32        }
33    }
34}
35
36/// Changer facilitates configuration changes. It exposes methods to handle
37/// simple and joint consensus while performing the proper validation that allows
38/// refusing invalid configuration changes before they affect the active
39/// configuration.
40pub struct Changer<'a> {
41    tracker: &'a ProgressTracker,
42}
43
44impl Changer<'_> {
45    /// Creates a changer.
46    pub fn new(tracker: &ProgressTracker) -> Changer {
47        Changer { tracker }
48    }
49
50    /// Verifies that the outgoing (=right) majority config of the joint
51    /// config is empty and initializes it with a copy of the incoming (=left)
52    /// majority config. That is, it transitions from
53    /// ```text
54    ///     (1 2 3)&&()
55    /// ```
56    /// to
57    /// ```text
58    ///     (1 2 3)&&(1 2 3)
59    /// ```.
60    ///
61    /// The supplied changes are then applied to the incoming majority config,
62    /// resulting in a joint configuration that in terms of the Raft thesis[1]
63    /// (Section 4.3) corresponds to `C_{new,old}`.
64    ///
65    /// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
66    pub fn enter_joint(
67        &self,
68        auto_leave: bool,
69        ccs: &[ConfChangeSingle],
70    ) -> Result<(Configuration, MapChange)> {
71        if super::joint(self.tracker.conf()) {
72            return Err(Error::ConfChangeError("config is already joint".to_owned()));
73        }
74        let (mut cfg, mut prs) = self.check_and_copy()?;
75        if cfg.voters().incoming.is_empty() {
76            // We allow adding nodes to an empty config for convenience (testing and
77            // bootstrap), but you can't enter a joint state.
78            return Err(Error::ConfChangeError(
79                "can't make a zero-voter config joint".to_owned(),
80            ));
81        }
82        cfg.voters
83            .outgoing
84            .extend(cfg.voters.incoming.iter().cloned());
85        self.apply(&mut cfg, &mut prs, ccs)?;
86        cfg.auto_leave = auto_leave;
87        check_invariants(&cfg, &prs)?;
88        Ok((cfg, prs.into_changes()))
89    }
90
91    /// Transitions out of a joint configuration. It is an error to call this method if
92    /// the configuration is not joint, i.e. if the outgoing majority config is empty.
93    ///
94    /// The outgoing majority config of the joint configuration will be removed, that is,
95    /// the incoming config is promoted as the sole decision maker. In the notation of
96    /// the Raft thesis[1] (Section 4.3), this method transitions from `C_{new,old}` into
97    /// `C_new`.
98    ///
99    /// At the same time, any staged learners (LearnersNext) the addition of which was
100    /// held back by an overlapping voter in the former outgoing config will be inserted
101    /// into Learners.
102    ///
103    /// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
104    pub fn leave_joint(&self) -> Result<(Configuration, MapChange)> {
105        if !super::joint(self.tracker.conf()) {
106            return Err(Error::ConfChangeError(
107                "can't leave a non-joint config".to_owned(),
108            ));
109        }
110        let (mut cfg, mut prs) = self.check_and_copy()?;
111        if cfg.voters().outgoing.is_empty() {
112            return Err(Error::ConfChangeError(format!(
113                "configuration is not joint: {:?}",
114                cfg
115            )));
116        }
117        cfg.learners.extend(cfg.learners_next.drain());
118
119        for id in &*cfg.voters.outgoing {
120            if !cfg.voters.incoming.contains(id) && !cfg.learners.contains(id) {
121                prs.changes.push((*id, MapChangeType::Remove));
122            }
123        }
124
125        cfg.voters.outgoing.clear();
126        cfg.auto_leave = false;
127        check_invariants(&cfg, &prs)?;
128        Ok((cfg, prs.into_changes()))
129    }
130
131    /// Carries out a series of configuration changes that (in aggregate) mutates the
132    /// incoming majority config `Voters[0]` by at most one. This method will return an
133    /// error if that is not the case, if the resulting quorum is zero, or if the
134    /// configuration is in a joint state (i.e. if there is an outgoing configuration).
135    pub fn simple(&mut self, ccs: &[ConfChangeSingle]) -> Result<(Configuration, MapChange)> {
136        if super::joint(self.tracker.conf()) {
137            return Err(Error::ConfChangeError(
138                "can't apply simple config change in joint config".to_owned(),
139            ));
140        }
141        let (mut cfg, mut prs) = self.check_and_copy()?;
142        self.apply(&mut cfg, &mut prs, ccs)?;
143
144        if cfg
145            .voters
146            .incoming
147            .symmetric_difference(&self.tracker.conf().voters.incoming)
148            .count()
149            > 1
150        {
151            return Err(Error::ConfChangeError(
152                "more than one voter changed without entering joint config".to_owned(),
153            ));
154        }
155        check_invariants(&cfg, &prs)?;
156        Ok((cfg, prs.into_changes()))
157    }
158
159    /// Applies a change to the configuration. By convention, changes to voters are always
160    /// made to the incoming majority config. Outgoing is either empty or preserves the
161    /// outgoing majority configuration while in a joint state.
162    fn apply(
163        &self,
164        cfg: &mut Configuration,
165        prs: &mut IncrChangeMap,
166        ccs: &[ConfChangeSingle],
167    ) -> Result<()> {
168        for cc in ccs {
169            if cc.node_id == 0 {
170                // Replaces the NodeID with zero if it decides (downstream of
171                // raft) to not apply a change, so we have to have explicit code
172                // here to ignore these.
173                continue;
174            }
175            match cc.get_change_type() {
176                ConfChangeType::AddNode => self.make_voter(cfg, prs, cc.node_id),
177                ConfChangeType::AddLearnerNode => self.make_learner(cfg, prs, cc.node_id),
178                ConfChangeType::RemoveNode => self.remove(cfg, prs, cc.node_id),
179            }
180        }
181        if cfg.voters().incoming.is_empty() {
182            return Err(Error::ConfChangeError("removed all voters".to_owned()));
183        }
184        Ok(())
185    }
186
187    /// Adds or promotes the given ID to be a voter in the incoming majority config.
188    fn make_voter(&self, cfg: &mut Configuration, prs: &mut IncrChangeMap, id: u64) {
189        if !prs.contains(id) {
190            self.init_progress(cfg, prs, id, false);
191            return;
192        }
193
194        cfg.voters.incoming.insert(id);
195        cfg.learners.remove(&id);
196        cfg.learners_next.remove(&id);
197    }
198
199    /// Makes the given ID a learner or stages it to be a learner once an active joint
200    /// configuration is exited.
201    ///
202    /// The former happens when the peer is not a part of the outgoing config, in which
203    /// case we either add a new learner or demote a voter in the incoming config.
204    ///
205    /// The latter case occurs when the configuration is joint and the peer is a voter
206    /// in the outgoing config. In that case, we do not want to add the peer as a learner
207    /// because then we'd have to track a peer as a voter and learner simultaneously.
208    /// Instead, we add the learner to LearnersNext, so that it will be added to Learners
209    /// the moment the outgoing config is removed by LeaveJoint().
210    fn make_learner(&self, cfg: &mut Configuration, prs: &mut IncrChangeMap, id: u64) {
211        if !prs.contains(id) {
212            self.init_progress(cfg, prs, id, true);
213            return;
214        }
215
216        if cfg.learners.contains(&id) {
217            return;
218        }
219
220        cfg.voters.incoming.remove(&id);
221        cfg.learners.remove(&id);
222        cfg.learners_next.remove(&id);
223
224        // Use LearnersNext if we can't add the learner to Learners directly, i.e.
225        // if the peer is still tracked as a voter in the outgoing config. It will
226        // be turned into a learner in LeaveJoint().
227        //
228        // Otherwise, add a regular learner right away.
229        if cfg.voters().outgoing.contains(&id) {
230            cfg.learners_next.insert(id);
231        } else {
232            cfg.learners.insert(id);
233        }
234    }
235
236    /// Removes this peer as a voter or learner from the incoming config.
237    fn remove(&self, cfg: &mut Configuration, prs: &mut IncrChangeMap, id: u64) {
238        if !prs.contains(id) {
239            return;
240        }
241
242        cfg.voters.incoming.remove(&id);
243        cfg.learners.remove(&id);
244        cfg.learners_next.remove(&id);
245
246        // If the peer is still a voter in the outgoing config, keep the Progress.
247        if !cfg.voters.outgoing.contains(&id) {
248            prs.changes.push((id, MapChangeType::Remove));
249        }
250    }
251
252    /// Initializes a new progress for the given node or learner.
253    fn init_progress(
254        &self,
255        cfg: &mut Configuration,
256        prs: &mut IncrChangeMap,
257        id: u64,
258        is_learner: bool,
259    ) {
260        if !is_learner {
261            cfg.voters.incoming.insert(id);
262        } else {
263            cfg.learners.insert(id);
264        }
265        prs.changes.push((id, MapChangeType::Add));
266    }
267
268    /// Copies the tracker's config. It returns an error if checkInvariants does.
269    ///
270    /// Unlike Etcd, we don't copy progress as we don't need to mutate the `is_learner`
271    /// flags. Additions and Removals should be done after everything is checked OK.
272    fn check_and_copy(&self) -> Result<(Configuration, IncrChangeMap)> {
273        let prs = IncrChangeMap {
274            changes: vec![],
275            base: self.tracker.progress(),
276        };
277        check_invariants(self.tracker.conf(), &prs)?;
278        Ok((self.tracker.conf().clone(), prs))
279    }
280}
281
282/// Makes sure that the config and progress are compatible with each other.
283/// This is used to check both what the Changer is initialized with, as well
284/// as what it returns.
285fn check_invariants(cfg: &Configuration, prs: &IncrChangeMap) -> Result<()> {
286    // NB: intentionally allow the empty config. In production we'll never see a
287    // non-empty config (we prevent it from being created) but we will need to
288    // be able to *create* an initial config, for example during bootstrap (or
289    // during tests). Instead of having to hand-code this, we allow
290    // transitioning from an empty config into any other legal and non-empty
291    // config.
292    for id in cfg.voters().ids().iter() {
293        if !prs.contains(id) {
294            return Err(Error::ConfChangeError(format!(
295                "no progress for voter {}",
296                id
297            )));
298        }
299    }
300    for id in &cfg.learners {
301        if !prs.contains(*id) {
302            return Err(Error::ConfChangeError(format!(
303                "no progress for learner {}",
304                id
305            )));
306        }
307        // Conversely Learners and Voters doesn't intersect at all.
308        if cfg.voters().outgoing.contains(id) {
309            return Err(Error::ConfChangeError(format!(
310                "{} is in learners and outgoing voters",
311                id
312            )));
313        }
314        if cfg.voters().incoming.contains(id) {
315            return Err(Error::ConfChangeError(format!(
316                "{} is in learners and incoming voters",
317                id
318            )));
319        }
320    }
321    for id in &cfg.learners_next {
322        if !prs.contains(*id) {
323            return Err(Error::ConfChangeError(format!(
324                "no progress for learner(next) {}",
325                id
326            )));
327        }
328
329        // Any staged learner was staged because it could not be directly added due
330        // to a conflicting voter in the outgoing config.
331        if !cfg.voters().outgoing.contains(id) {
332            return Err(Error::ConfChangeError(format!(
333                "{} is in learners_next and outgoing voters",
334                id
335            )));
336        }
337    }
338
339    if !super::joint(cfg) {
340        // Etcd enforces outgoing and learner_next to be nil map. But there is no nil
341        // in rust. We just check empty for simplicity.
342        if !cfg.learners_next().is_empty() {
343            return Err(Error::ConfChangeError(
344                "learners_next must be empty when not joint".to_owned(),
345            ));
346        }
347        if cfg.auto_leave {
348            return Err(Error::ConfChangeError(
349                "auto_leave must be false when not joint".to_owned(),
350            ));
351        }
352    }
353
354    Ok(())
355}