nodedb_cluster/
lifecycle_state.rs1use std::sync::{Arc, RwLock};
26
27use serde::{Deserialize, Serialize};
28use tracing::info;
29
30use crate::readiness;
31
32#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
34#[serde(tag = "phase", rename_all = "snake_case")]
35pub enum ClusterLifecycleState {
36 Starting,
38 Restarting,
40 Bootstrapping,
42 Joining {
44 attempt: u32,
47 },
48 Ready {
51 nodes: usize,
53 },
54 Failed {
58 reason: String,
60 },
61}
62
63impl ClusterLifecycleState {
64 pub fn label(&self) -> &'static str {
68 match self {
69 Self::Starting => "starting",
70 Self::Restarting => "restarting",
71 Self::Bootstrapping => "bootstrapping",
72 Self::Joining { .. } => "joining",
73 Self::Ready { .. } => "ready",
74 Self::Failed { .. } => "failed",
75 }
76 }
77
78 pub fn is_ready(&self) -> bool {
80 matches!(self, Self::Ready { .. })
81 }
82
83 pub fn all_labels() -> &'static [&'static str] {
86 &[
87 "starting",
88 "restarting",
89 "bootstrapping",
90 "joining",
91 "ready",
92 "failed",
93 ]
94 }
95}
96
97#[derive(Debug, Clone)]
113pub struct ClusterLifecycleTracker {
114 inner: Arc<RwLock<ClusterLifecycleState>>,
115}
116
117impl ClusterLifecycleTracker {
118 pub fn new() -> Self {
120 Self {
121 inner: Arc::new(RwLock::new(ClusterLifecycleState::Starting)),
122 }
123 }
124
125 pub fn current(&self) -> ClusterLifecycleState {
128 self.inner.read().unwrap_or_else(|p| p.into_inner()).clone()
129 }
130
131 pub fn is_ready(&self) -> bool {
133 self.current().is_ready()
134 }
135
136 pub fn to_restarting(&self) {
137 self.transition(ClusterLifecycleState::Restarting, "restart");
138 }
139
140 pub fn to_bootstrapping(&self) {
141 self.transition(
142 ClusterLifecycleState::Bootstrapping,
143 "bootstrapping new cluster",
144 );
145 }
146
147 pub fn to_joining(&self, attempt: u32) {
148 let detail = format!("joining cluster (attempt {attempt})");
149 self.transition(ClusterLifecycleState::Joining { attempt }, &detail);
150 }
151
152 pub fn to_ready(&self, nodes: usize) {
153 let detail = format!("ready ({nodes} nodes)");
154 self.transition(ClusterLifecycleState::Ready { nodes }, &detail);
155 }
156
157 pub fn to_failed(&self, reason: impl Into<String>) {
158 let reason = reason.into();
159 let detail = format!("failed: {reason}");
160 self.transition(ClusterLifecycleState::Failed { reason }, &detail);
161 }
162
163 fn transition(&self, new: ClusterLifecycleState, human: &str) {
166 let prev = {
167 let mut guard = self.inner.write().unwrap_or_else(|p| p.into_inner());
168 std::mem::replace(&mut *guard, new.clone())
169 };
170 info!(
171 prev = prev.label(),
172 new = new.label(),
173 detail = human,
174 "cluster lifecycle transition"
175 );
176 readiness::notify_status(human);
177 }
178}
179
180impl Default for ClusterLifecycleTracker {
181 fn default() -> Self {
182 Self::new()
183 }
184}
185
186#[cfg(test)]
187mod tests {
188 use super::*;
189
190 #[test]
191 fn initial_state_is_starting() {
192 let t = ClusterLifecycleTracker::new();
193 assert_eq!(t.current(), ClusterLifecycleState::Starting);
194 assert!(!t.is_ready());
195 }
196
197 #[test]
198 fn transition_sequence_logs_and_updates() {
199 let t = ClusterLifecycleTracker::new();
200 t.to_joining(0);
201 assert_eq!(t.current(), ClusterLifecycleState::Joining { attempt: 0 });
202 t.to_joining(1);
203 assert_eq!(t.current(), ClusterLifecycleState::Joining { attempt: 1 });
204 t.to_ready(3);
205 assert_eq!(t.current(), ClusterLifecycleState::Ready { nodes: 3 });
206 assert!(t.is_ready());
207 }
208
209 #[test]
210 fn bootstrapping_then_ready() {
211 let t = ClusterLifecycleTracker::new();
212 t.to_bootstrapping();
213 assert_eq!(t.current(), ClusterLifecycleState::Bootstrapping);
214 t.to_ready(1);
215 assert!(t.is_ready());
216 }
217
218 #[test]
219 fn restarting_path() {
220 let t = ClusterLifecycleTracker::new();
221 t.to_restarting();
222 assert_eq!(t.current(), ClusterLifecycleState::Restarting);
223 t.to_ready(3);
224 assert!(t.is_ready());
225 }
226
227 #[test]
228 fn failed_is_not_terminal_by_contract() {
229 let t = ClusterLifecycleTracker::new();
234 t.to_joining(5);
235 t.to_failed("timeout");
236 assert!(matches!(t.current(), ClusterLifecycleState::Failed { .. }));
237 t.to_ready(3);
238 assert_eq!(t.current(), ClusterLifecycleState::Ready { nodes: 3 });
239 }
240
241 #[test]
242 fn labels_are_stable() {
243 assert_eq!(ClusterLifecycleState::Starting.label(), "starting");
244 assert_eq!(ClusterLifecycleState::Restarting.label(), "restarting");
245 assert_eq!(
246 ClusterLifecycleState::Bootstrapping.label(),
247 "bootstrapping"
248 );
249 assert_eq!(
250 ClusterLifecycleState::Joining { attempt: 0 }.label(),
251 "joining"
252 );
253 assert_eq!(ClusterLifecycleState::Ready { nodes: 3 }.label(), "ready");
254 assert_eq!(
255 ClusterLifecycleState::Failed { reason: "x".into() }.label(),
256 "failed"
257 );
258 }
259
260 #[test]
261 fn all_labels_matches_variants() {
262 for variant in [
265 ClusterLifecycleState::Starting,
266 ClusterLifecycleState::Restarting,
267 ClusterLifecycleState::Bootstrapping,
268 ClusterLifecycleState::Joining { attempt: 0 },
269 ClusterLifecycleState::Ready { nodes: 0 },
270 ClusterLifecycleState::Failed { reason: "x".into() },
271 ] {
272 assert!(
273 ClusterLifecycleState::all_labels().contains(&variant.label()),
274 "label {} missing from all_labels()",
275 variant.label()
276 );
277 }
278 }
279
280 #[test]
281 fn tracker_is_cheap_to_clone() {
282 let a = ClusterLifecycleTracker::new();
283 let b = a.clone();
284 a.to_bootstrapping();
285 assert_eq!(b.current(), ClusterLifecycleState::Bootstrapping);
287 }
288}