ember_cluster/election.rs
1//! Automatic failover election state machine.
2//!
3//! When gossip confirms a primary has failed, its replicas compete to replace
4//! it. Each replica starts a timed election: it broadcasts a `VoteRequest`
5//! via gossip and waits for primary nodes to respond with `VoteGranted`.
6//!
7//! The first replica to collect votes from a majority of the alive primaries
8//! wins the election and promotes itself via `CLUSTER FAILOVER FORCE`.
9//!
10//! # Fairness
11//!
12//! Replicas that are more up-to-date (higher replication offset) are
13//! preferred by waiting less before broadcasting their request. This is
14//! achieved by the caller applying a stagger delay before calling
15//! `queue_vote_request`.
16
17use std::collections::HashSet;
18
19use crate::NodeId;
20
21/// State for an in-progress automatic failover election.
22///
23/// Created when a replica detects its primary has failed. Discarded when
24/// the election succeeds (promotion triggered) or times out.
25pub struct Election {
26 /// Config epoch this election is contesting.
27 pub epoch: u64,
28 /// Node IDs of primaries that have voted for us.
29 votes: HashSet<NodeId>,
30 /// Whether promotion has already been triggered for this election.
31 promoted: bool,
32}
33
34impl Election {
35 /// Creates a new election for the given epoch.
36 pub fn new(epoch: u64) -> Self {
37 Self {
38 epoch,
39 votes: HashSet::new(),
40 promoted: false,
41 }
42 }
43
44 /// Records a vote from `from`. Returns `true` if quorum is newly reached.
45 ///
46 /// `total_primaries` is the number of alive primaries (excluding the
47 /// failed one) at the time the election started. Quorum is a simple
48 /// majority: `total_primaries / 2 + 1`.
49 ///
50 /// Once quorum is reached this returns `true` exactly once; subsequent
51 /// calls return `false` so the caller promotes exactly once.
52 pub fn record_vote(&mut self, from: NodeId, total_primaries: usize) -> bool {
53 if self.promoted {
54 return false;
55 }
56 self.votes.insert(from);
57 self.promoted = self.votes.len() >= Self::quorum(total_primaries);
58 self.promoted
59 }
60
61 /// Minimum votes required for a majority.
62 pub fn quorum(total_primaries: usize) -> usize {
63 total_primaries / 2 + 1
64 }
65
66 /// Returns `true` if this election has already succeeded.
67 pub fn is_promoted(&self) -> bool {
68 self.promoted
69 }
70}
71
72#[cfg(test)]
73mod tests {
74 use super::*;
75
76 #[test]
77 fn quorum_single_primary() {
78 assert_eq!(Election::quorum(1), 1);
79 }
80
81 #[test]
82 fn quorum_three_primaries() {
83 assert_eq!(Election::quorum(3), 2);
84 }
85
86 #[test]
87 fn quorum_five_primaries() {
88 assert_eq!(Election::quorum(5), 3);
89 }
90
91 #[test]
92 fn record_vote_reaches_quorum() {
93 let mut e = Election::new(1);
94 let n1 = NodeId::new();
95 let n2 = NodeId::new();
96 // 3 primaries → need 2 votes
97 assert!(!e.record_vote(n1, 3), "first vote should not reach quorum");
98 assert!(e.record_vote(n2, 3), "second vote should reach quorum");
99 assert!(e.is_promoted());
100 }
101
102 #[test]
103 fn record_vote_deduplicates() {
104 let mut e = Election::new(1);
105 let n1 = NodeId::new();
106 // 3 primaries need 2 votes; same voter counted only once
107 assert!(!e.record_vote(n1, 3));
108 assert!(!e.record_vote(n1, 3), "duplicate vote must not count");
109 assert!(!e.is_promoted());
110 }
111
112 #[test]
113 fn no_votes_after_promotion() {
114 let mut e = Election::new(1);
115 let n1 = NodeId::new();
116 // single primary — one vote is enough
117 assert!(e.record_vote(n1, 1));
118 // further votes return false
119 assert!(!e.record_vote(NodeId::new(), 1));
120 }
121
122 #[test]
123 fn single_node_quorum() {
124 let mut e = Election::new(42);
125 assert!(e.record_vote(NodeId::new(), 1));
126 assert!(e.is_promoted());
127 }
128}