Skip to main content

atomr_cluster/
sbr.rs

1//! Split-brain resolvers.
2//!
3//! Five strategies are implemented matching :
4//! * KeepMajority
5//! * StaticQuorum
6//! * KeepOldest
7//! * KeepReferee
8//! * LeaseMajority
9
10use crate::member::{Member, MemberStatus};
11
12/// What the resolver recommends the cluster do with the given side.
13#[derive(Debug, Clone, PartialEq, Eq)]
14#[non_exhaustive]
15pub enum DowningDecision {
16    DownUnreachable,
17    DownAll,
18    DownSelf,
19    Stay,
20}
21
22pub trait DowningStrategy: Send + Sync {
23    fn decide(&self, reachable: &[&Member], unreachable: &[&Member]) -> DowningDecision;
24}
25
26/// KeepMajority: the side with strictly more up members survives.
27#[derive(Debug, Clone, Copy, Default)]
28pub struct KeepMajorityStrategy;
29
30impl DowningStrategy for KeepMajorityStrategy {
31    fn decide(&self, r: &[&Member], u: &[&Member]) -> DowningDecision {
32        let up = |ms: &[&Member]| ms.iter().filter(|m| m.status == MemberStatus::Up).count();
33        let rn = up(r);
34        let un = up(u);
35        if rn > un {
36            DowningDecision::DownUnreachable
37        } else if rn < un {
38            DowningDecision::DownSelf
39        } else {
40            DowningDecision::DownAll
41        }
42    }
43}
44
45/// StaticQuorum: requires at least `quorum_size` reachable members to survive.
46#[derive(Debug, Clone, Copy)]
47pub struct StaticQuorumStrategy {
48    pub quorum_size: usize,
49}
50
51impl DowningStrategy for StaticQuorumStrategy {
52    fn decide(&self, r: &[&Member], _: &[&Member]) -> DowningDecision {
53        if r.len() >= self.quorum_size {
54            DowningDecision::DownUnreachable
55        } else {
56            DowningDecision::DownSelf
57        }
58    }
59}
60
61/// KeepOldest: the side containing the oldest (lowest `up_number`) up member survives.
62#[derive(Debug, Clone, Copy, Default)]
63pub struct KeepOldestStrategy {
64    pub down_if_alone: bool,
65}
66
67impl DowningStrategy for KeepOldestStrategy {
68    fn decide(&self, r: &[&Member], u: &[&Member]) -> DowningDecision {
69        fn oldest<'a>(ms: &[&'a Member]) -> Option<&'a Member> {
70            ms.iter().min_by_key(|m| m.up_number).copied()
71        }
72        let rolds = oldest(r);
73        let uolds = oldest(u);
74        match (rolds, uolds) {
75            (Some(ro), Some(uo)) => {
76                if ro.up_number <= uo.up_number {
77                    if r.len() == 1 && self.down_if_alone {
78                        DowningDecision::DownAll
79                    } else {
80                        DowningDecision::DownUnreachable
81                    }
82                } else {
83                    DowningDecision::DownSelf
84                }
85            }
86            (Some(_), None) => DowningDecision::DownUnreachable,
87            (None, Some(_)) => DowningDecision::DownSelf,
88            (None, None) => DowningDecision::Stay,
89        }
90    }
91}
92
93/// KeepReferee: the side containing the designated `referee` member survives.
94#[derive(Debug, Clone)]
95pub struct KeepReferee {
96    pub referee: String,
97    pub down_all_if_less_than: usize,
98}
99
100impl DowningStrategy for KeepReferee {
101    fn decide(&self, r: &[&Member], _u: &[&Member]) -> DowningDecision {
102        let has_referee = r.iter().any(|m| m.address.to_string() == self.referee);
103        if !has_referee {
104            return DowningDecision::DownSelf;
105        }
106        if r.len() < self.down_all_if_less_than {
107            DowningDecision::DownAll
108        } else {
109            DowningDecision::DownUnreachable
110        }
111    }
112}
113
114/// DownAll: unconditionally downs every member on both sides of the
115/// partition. Used when the operator prefers cluster-wide restart over
116/// any chance of split-brain (matches "down-all-when-unstable" in
117/// related industry SBR catalogs).
118///
119/// Returns [`DowningDecision::DownAll`] whenever there is any
120/// unreachable member; [`DowningDecision::Stay`] when the partition is
121/// healthy. The reachable/unreachable inputs are inspected only to
122/// distinguish those two cases.
123#[derive(Debug, Clone, Copy, Default)]
124pub struct DownAllStrategy;
125
126impl DowningStrategy for DownAllStrategy {
127    fn decide(&self, _r: &[&Member], u: &[&Member]) -> DowningDecision {
128        if u.is_empty() {
129            DowningDecision::Stay
130        } else {
131            DowningDecision::DownAll
132        }
133    }
134}
135
136/// LeaseMajority: majority decision gated by an external lease. In-memory
137/// simulation of whether a lease was acquired.
138#[derive(Debug, Clone, Copy, Default)]
139pub struct LeaseMajorityStrategy {
140    pub lease_acquired: bool,
141}
142
143impl DowningStrategy for LeaseMajorityStrategy {
144    fn decide(&self, r: &[&Member], u: &[&Member]) -> DowningDecision {
145        let m = KeepMajorityStrategy.decide(r, u);
146        match m {
147            DowningDecision::DownAll if self.lease_acquired => DowningDecision::DownUnreachable,
148            other => other,
149        }
150    }
151}
152
153/// Facade that holds any of the strategies behind a trait object.
154pub struct SplitBrainResolver {
155    pub strategy: Box<dyn DowningStrategy>,
156}
157
158impl SplitBrainResolver {
159    pub fn new(strategy: Box<dyn DowningStrategy>) -> Self {
160        Self { strategy }
161    }
162    pub fn decide(&self, r: &[&Member], u: &[&Member]) -> DowningDecision {
163        self.strategy.decide(r, u)
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170    use atomr_core::actor::Address;
171
172    fn up(n: i32) -> Member {
173        let mut m = Member::new(Address::local(format!("N{n}")), vec![]);
174        m.status = MemberStatus::Up;
175        m.up_number = n;
176        m
177    }
178
179    #[test]
180    fn keep_majority_prefers_larger_side() {
181        let r = [up(1), up(2), up(3)];
182        let u = [up(4)];
183        let r_ref: Vec<&Member> = r.iter().collect();
184        let u_ref: Vec<&Member> = u.iter().collect();
185        assert_eq!(KeepMajorityStrategy.decide(&r_ref, &u_ref), DowningDecision::DownUnreachable);
186    }
187
188    #[test]
189    fn static_quorum_enforces_size() {
190        let r = [up(1)];
191        let u = [up(2)];
192        let r_ref: Vec<&Member> = r.iter().collect();
193        let u_ref: Vec<&Member> = u.iter().collect();
194        assert_eq!(StaticQuorumStrategy { quorum_size: 2 }.decide(&r_ref, &u_ref), DowningDecision::DownSelf);
195    }
196
197    #[test]
198    fn keep_oldest_picks_lowest_up_number() {
199        let r = [up(1)];
200        let u = [up(2), up(3)];
201        let r_ref: Vec<&Member> = r.iter().collect();
202        let u_ref: Vec<&Member> = u.iter().collect();
203        assert_eq!(KeepOldestStrategy::default().decide(&r_ref, &u_ref), DowningDecision::DownUnreachable);
204    }
205
206    #[test]
207    fn down_all_strategy_downs_every_member_when_partitioned() {
208        let r = [up(1), up(2)];
209        let u = [up(3)];
210        let r_ref: Vec<&Member> = r.iter().collect();
211        let u_ref: Vec<&Member> = u.iter().collect();
212        assert_eq!(DownAllStrategy.decide(&r_ref, &u_ref), DowningDecision::DownAll);
213    }
214
215    #[test]
216    fn down_all_strategy_stays_when_no_unreachable() {
217        let r = [up(1), up(2), up(3)];
218        let u: [Member; 0] = [];
219        let r_ref: Vec<&Member> = r.iter().collect();
220        let u_ref: Vec<&Member> = u.iter().collect();
221        assert_eq!(DownAllStrategy.decide(&r_ref, &u_ref), DowningDecision::Stay);
222    }
223
224    #[test]
225    fn down_all_strategy_downs_even_with_majority_reachable() {
226        // Unlike KeepMajority, DownAll doesn't care about side sizes —
227        // any unreachable member triggers a full down.
228        let r = [up(1), up(2), up(3), up(4)];
229        let u = [up(5)];
230        let r_ref: Vec<&Member> = r.iter().collect();
231        let u_ref: Vec<&Member> = u.iter().collect();
232        assert_eq!(DownAllStrategy.decide(&r_ref, &u_ref), DowningDecision::DownAll);
233    }
234}