Skip to main content

hyperi_rustlib/health/
registry.rs

1// Project:   hyperi-rustlib
2// File:      src/health/registry.rs
3// Purpose:   Global health registry singleton for component health tracking
4// Language:  Rust
5//
6// License:   BUSL-1.1
7// Copyright: (c) 2026 HYPERI PTY LIMITED
8
9//! Global health registry for unified service health state.
10//!
11//! Modules register health check callbacks at construction. The registry
12//! aggregates component status to determine overall service health.
13//!
14//! # Design
15//!
16//! - Global singleton via `OnceLock` (consistent with config registry pattern)
17//! - Components register a closure that returns their current [`HealthStatus`]
18//! - [`is_healthy`](HealthRegistry::is_healthy) requires ALL components healthy
19//! - [`is_ready`](HealthRegistry::is_ready) requires NO components unhealthy
20//!   (degraded is acceptable for readiness)
21//! - Empty registry is considered healthy (vacuously true)
22
23use std::sync::{Arc, Mutex, OnceLock};
24
25/// Health status of a registered component.
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum HealthStatus {
28    /// Component is fully operational.
29    Healthy,
30    /// Component is operational but impaired (e.g., circuit half-open,
31    /// elevated latency, fallback active).
32    Degraded,
33    /// Component is not operational. Service should not receive traffic.
34    Unhealthy,
35}
36
37impl HealthStatus {
38    /// String representation for JSON serialisation and endpoint output.
39    #[must_use]
40    pub fn as_str(self) -> &'static str {
41        match self {
42            Self::Healthy => "healthy",
43            Self::Degraded => "degraded",
44            Self::Unhealthy => "unhealthy",
45        }
46    }
47}
48
49/// Health check callback -- returns current component status.
50type HealthCheck = Arc<dyn Fn() -> HealthStatus + Send + Sync>;
51
52/// A registered health check entry.
53struct HealthEntry {
54    name: String,
55    check: HealthCheck,
56}
57
58/// Global health registry singleton.
59///
60/// Modules register health check callbacks at construction. The registry
61/// aggregates all component statuses to determine overall service health.
62///
63/// # Thread Safety
64///
65/// The registry uses `Mutex<Vec<_>>` for registration (infrequent, at
66/// init time) and read access (health checks). For the typical DFE app
67/// with 3-8 registered components, lock contention is negligible.
68pub struct HealthRegistry {
69    components: Mutex<Vec<HealthEntry>>,
70}
71
72/// Global singleton instance.
73static REGISTRY: OnceLock<HealthRegistry> = OnceLock::new();
74
75impl HealthRegistry {
76    /// Create a new empty registry.
77    fn new() -> Self {
78        Self {
79            components: Mutex::new(Vec::new()),
80        }
81    }
82
83    /// Get or initialise the global registry.
84    fn global() -> &'static Self {
85        REGISTRY.get_or_init(Self::new)
86    }
87
88    /// Register a health check callback.
89    ///
90    /// Called by modules at construction time. The callback is invoked
91    /// each time health is queried, so it should be cheap (e.g., read
92    /// an `AtomicBool` or check a cached state).
93    ///
94    /// # Duplicate Names
95    ///
96    /// Multiple components may register with the same name. Each
97    /// registration is independent -- the registry does not deduplicate.
98    pub fn register(
99        name: impl Into<String>,
100        check: impl Fn() -> HealthStatus + Send + Sync + 'static,
101    ) {
102        let registry = Self::global();
103        if let Ok(mut components) = registry.components.lock() {
104            components.push(HealthEntry {
105                name: name.into(),
106                check: Arc::new(check),
107            });
108        }
109    }
110
111    /// Snapshot every registered check, drop the registry lock, then run
112    /// the checks. A check callback that re-enters
113    /// [`HealthRegistry::register`] (or any other path that takes the
114    /// registry mutex) would otherwise deadlock against itself; this
115    /// keeps the lock critical-section to a clone of the `Arc<dyn Fn>`
116    /// handles.
117    fn snapshot_checks() -> Vec<HealthCheck> {
118        let registry = Self::global();
119        registry
120            .components
121            .lock()
122            .ok()
123            .map(|components| components.iter().map(|c| Arc::clone(&c.check)).collect())
124            .unwrap_or_default()
125    }
126
127    /// Check if ALL components are healthy.
128    ///
129    /// Returns `true` if the registry is empty (vacuously true) or
130    /// every registered component reports [`HealthStatus::Healthy`].
131    #[must_use]
132    pub fn is_healthy() -> bool {
133        Self::snapshot_checks()
134            .iter()
135            .all(|check| check() == HealthStatus::Healthy)
136    }
137
138    /// Check if the service is ready to receive traffic.
139    ///
140    /// Ready means no component is [`HealthStatus::Unhealthy`]. Degraded
141    /// components are acceptable -- the service can still serve requests,
142    /// just with reduced capability.
143    ///
144    /// Returns `true` if the registry is empty (vacuously true).
145    #[must_use]
146    pub fn is_ready() -> bool {
147        Self::snapshot_checks()
148            .iter()
149            .all(|check| check() != HealthStatus::Unhealthy)
150    }
151
152    /// Get per-component health status.
153    ///
154    /// Returns a snapshot of all registered components and their current
155    /// status. Useful for detailed health endpoints.
156    #[must_use]
157    pub fn components() -> Vec<(String, HealthStatus)> {
158        // Snapshot (name, check) pairs under the lock; run checks after
159        // releasing it. See `snapshot_checks` for the reentrancy rationale.
160        let snapshot: Vec<(String, HealthCheck)> = {
161            let registry = Self::global();
162            let Ok(components) = registry.components.lock() else {
163                return Vec::new();
164            };
165            components
166                .iter()
167                .map(|c| (c.name.clone(), Arc::clone(&c.check)))
168                .collect()
169        };
170        snapshot
171            .into_iter()
172            .map(|(name, check)| (name, check()))
173            .collect()
174    }
175
176    /// Get a JSON representation of the health state.
177    ///
178    /// Suitable for a `/health/detailed` endpoint response.
179    #[cfg(feature = "serde_json")]
180    #[must_use]
181    pub fn to_json() -> serde_json::Value {
182        let components = Self::components();
183        let overall = if Self::is_healthy() {
184            "healthy"
185        } else if Self::is_ready() {
186            "degraded"
187        } else {
188            "unhealthy"
189        };
190
191        serde_json::json!({
192            "status": overall,
193            "components": components.iter().map(|(name, status)| {
194                serde_json::json!({
195                    "name": name,
196                    "status": status.as_str(),
197                })
198            }).collect::<Vec<_>>()
199        })
200    }
201
202    /// Clear all registered components (for testing only).
203    #[cfg(test)]
204    pub(crate) fn reset() {
205        let registry = Self::global();
206        if let Ok(mut components) = registry.components.lock() {
207            components.clear();
208        }
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use std::sync::atomic::{AtomicU8, Ordering};
215
216    use super::*;
217
218    /// Tests share global statics -- serialise them.
219    static TEST_LOCK: Mutex<()> = Mutex::new(());
220
221    macro_rules! serial_test {
222        () => {
223            let _guard = TEST_LOCK.lock().unwrap();
224            HealthRegistry::reset();
225        };
226    }
227
228    #[test]
229    fn empty_registry_is_healthy() {
230        serial_test!();
231
232        assert!(HealthRegistry::is_healthy());
233        assert!(HealthRegistry::is_ready());
234        assert!(HealthRegistry::components().is_empty());
235    }
236
237    #[test]
238    fn register_and_check_healthy() {
239        serial_test!();
240
241        HealthRegistry::register("transport", || HealthStatus::Healthy);
242        HealthRegistry::register("database", || HealthStatus::Healthy);
243
244        assert!(HealthRegistry::is_healthy());
245        assert!(HealthRegistry::is_ready());
246
247        let components = HealthRegistry::components();
248        assert_eq!(components.len(), 2);
249        assert_eq!(components[0].0, "transport");
250        assert_eq!(components[0].1, HealthStatus::Healthy);
251        assert_eq!(components[1].0, "database");
252        assert_eq!(components[1].1, HealthStatus::Healthy);
253    }
254
255    #[test]
256    fn unhealthy_component_fails_check() {
257        serial_test!();
258
259        HealthRegistry::register("transport", || HealthStatus::Healthy);
260        HealthRegistry::register("database", || HealthStatus::Unhealthy);
261
262        assert!(!HealthRegistry::is_healthy());
263        assert!(!HealthRegistry::is_ready());
264    }
265
266    #[test]
267    fn degraded_is_ready_but_not_healthy() {
268        serial_test!();
269
270        HealthRegistry::register("transport", || HealthStatus::Healthy);
271        HealthRegistry::register("circuit_breaker", || HealthStatus::Degraded);
272
273        assert!(!HealthRegistry::is_healthy());
274        assert!(HealthRegistry::is_ready());
275    }
276
277    #[test]
278    fn dynamic_health_check_reflects_state_changes() {
279        serial_test!();
280
281        // Simulate a component whose health changes at runtime
282        let state = Arc::new(AtomicU8::new(0)); // 0=healthy, 1=degraded, 2=unhealthy
283        let state_clone = state.clone();
284
285        HealthRegistry::register("dynamic", move || {
286            match state_clone.load(Ordering::Relaxed) {
287                0 => HealthStatus::Healthy,
288                1 => HealthStatus::Degraded,
289                _ => HealthStatus::Unhealthy,
290            }
291        });
292
293        // Initially healthy
294        assert!(HealthRegistry::is_healthy());
295        assert!(HealthRegistry::is_ready());
296
297        // Transition to degraded
298        state.store(1, Ordering::Relaxed);
299        assert!(!HealthRegistry::is_healthy());
300        assert!(HealthRegistry::is_ready());
301
302        // Transition to unhealthy
303        state.store(2, Ordering::Relaxed);
304        assert!(!HealthRegistry::is_healthy());
305        assert!(!HealthRegistry::is_ready());
306
307        // Recovery back to healthy
308        state.store(0, Ordering::Relaxed);
309        assert!(HealthRegistry::is_healthy());
310        assert!(HealthRegistry::is_ready());
311    }
312
313    #[test]
314    fn health_status_as_str() {
315        assert_eq!(HealthStatus::Healthy.as_str(), "healthy");
316        assert_eq!(HealthStatus::Degraded.as_str(), "degraded");
317        assert_eq!(HealthStatus::Unhealthy.as_str(), "unhealthy");
318    }
319
320    #[test]
321    #[cfg(feature = "serde_json")]
322    fn to_json_includes_all_components() {
323        serial_test!();
324
325        HealthRegistry::register("kafka", || HealthStatus::Healthy);
326        HealthRegistry::register("clickhouse", || HealthStatus::Degraded);
327
328        let json = HealthRegistry::to_json();
329
330        assert_eq!(json["status"], "degraded");
331
332        let components = json["components"].as_array().unwrap();
333        assert_eq!(components.len(), 2);
334
335        assert_eq!(components[0]["name"], "kafka");
336        assert_eq!(components[0]["status"], "healthy");
337
338        assert_eq!(components[1]["name"], "clickhouse");
339        assert_eq!(components[1]["status"], "degraded");
340    }
341
342    #[test]
343    #[cfg(feature = "serde_json")]
344    fn to_json_empty_registry() {
345        serial_test!();
346
347        let json = HealthRegistry::to_json();
348        assert_eq!(json["status"], "healthy");
349        assert!(json["components"].as_array().unwrap().is_empty());
350    }
351
352    #[test]
353    #[cfg(feature = "serde_json")]
354    fn to_json_unhealthy_status() {
355        serial_test!();
356
357        HealthRegistry::register("broken", || HealthStatus::Unhealthy);
358
359        let json = HealthRegistry::to_json();
360        assert_eq!(json["status"], "unhealthy");
361    }
362}