Skip to main content

hyperi_rustlib/health/
registry.rs

1// Project:   hyperi-rustlib
2// File:      src/health/registry.rs
3// Purpose:   Global health registry singleton for component health tracking
4// Language:  Rust
5//
6// License:   BUSL-1.1
7// Copyright: (c) 2026 HYPERI PTY LIMITED
8
9//! Global health registry for unified service health state.
10//!
11//! Modules register health check callbacks at construction. The registry
12//! aggregates component status to determine overall service health.
13//!
14//! # Design
15//!
16//! - Global singleton via `OnceLock` (consistent with config registry pattern)
17//! - Components register a closure that returns their current [`HealthStatus`]
18//! - [`is_healthy`](HealthRegistry::is_healthy) requires ALL components healthy
19//! - [`is_ready`](HealthRegistry::is_ready) requires NO components unhealthy
20//!   (degraded is acceptable for readiness)
21//! - Empty registry is considered healthy (vacuously true)
22
23use std::sync::{Arc, Mutex, OnceLock};
24
25/// Health status of a registered component.
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum HealthStatus {
28    /// Component is fully operational.
29    Healthy,
30    /// Component is operational but impaired (e.g., circuit half-open,
31    /// elevated latency, fallback active).
32    Degraded,
33    /// Component is not operational. Service should not receive traffic.
34    Unhealthy,
35}
36
37impl HealthStatus {
38    /// String representation for JSON serialisation and endpoint output.
39    #[must_use]
40    pub fn as_str(self) -> &'static str {
41        match self {
42            Self::Healthy => "healthy",
43            Self::Degraded => "degraded",
44            Self::Unhealthy => "unhealthy",
45        }
46    }
47}
48
49/// Health check callback -- returns current component status.
50type HealthCheck = Arc<dyn Fn() -> HealthStatus + Send + Sync>;
51
52/// A registered health check entry.
53struct HealthEntry {
54    name: String,
55    check: HealthCheck,
56}
57
58/// Global health registry singleton.
59///
60/// Modules register health check callbacks at construction. The registry
61/// aggregates all component statuses to determine overall service health.
62///
63/// # Thread Safety
64///
65/// The registry uses `Mutex<Vec<_>>` for registration (infrequent, at
66/// init time) and read access (health checks). For the typical DFE app
67/// with 3-8 registered components, lock contention is negligible.
68pub struct HealthRegistry {
69    components: Mutex<Vec<HealthEntry>>,
70}
71
72/// Global singleton instance.
73static REGISTRY: OnceLock<HealthRegistry> = OnceLock::new();
74
75impl HealthRegistry {
76    /// Create a new empty registry.
77    fn new() -> Self {
78        Self {
79            components: Mutex::new(Vec::new()),
80        }
81    }
82
83    /// Get or initialise the global registry.
84    fn global() -> &'static Self {
85        REGISTRY.get_or_init(Self::new)
86    }
87
88    /// Register a health check callback.
89    ///
90    /// Called by modules at construction time. The callback is invoked
91    /// each time health is queried, so it should be cheap (e.g., read
92    /// an `AtomicBool` or check a cached state).
93    ///
94    /// # Duplicate Names
95    ///
96    /// Multiple components may register with the same name. Each
97    /// registration is independent -- the registry does not deduplicate.
98    pub fn register(
99        name: impl Into<String>,
100        check: impl Fn() -> HealthStatus + Send + Sync + 'static,
101    ) {
102        let registry = Self::global();
103        if let Ok(mut components) = registry.components.lock() {
104            components.push(HealthEntry {
105                name: name.into(),
106                check: Arc::new(check),
107            });
108        }
109    }
110
111    /// Snapshot every registered check, drop the registry lock, then run
112    /// the checks. A check callback that re-enters
113    /// [`HealthRegistry::register`] (or any other path that takes the
114    /// registry mutex) would otherwise deadlock against itself; this
115    /// keeps the lock critical-section to a clone of the `Arc<dyn Fn>`
116    /// handles.
117    fn snapshot_checks() -> Vec<HealthCheck> {
118        let registry = Self::global();
119        registry
120            .components
121            .lock()
122            .ok()
123            .map(|components| components.iter().map(|c| Arc::clone(&c.check)).collect())
124            .unwrap_or_default()
125    }
126
127    /// Check if ALL components are healthy.
128    ///
129    /// Returns `true` if the registry is empty (vacuously true) or
130    /// every registered component reports [`HealthStatus::Healthy`].
131    #[must_use]
132    pub fn is_healthy() -> bool {
133        Self::snapshot_checks()
134            .iter()
135            .all(|check| check() == HealthStatus::Healthy)
136    }
137
138    /// Check if the service is ready to receive traffic.
139    ///
140    /// Ready means no component is [`HealthStatus::Unhealthy`]. Degraded
141    /// components are acceptable -- the service can still serve requests,
142    /// just with reduced capability.
143    ///
144    /// Returns `true` if the registry is empty (vacuously true).
145    #[must_use]
146    pub fn is_ready() -> bool {
147        Self::snapshot_checks()
148            .iter()
149            .all(|check| check() != HealthStatus::Unhealthy)
150    }
151
152    /// Get per-component health status.
153    ///
154    /// Returns a snapshot of all registered components and their current
155    /// status. Useful for detailed health endpoints.
156    #[must_use]
157    pub fn components() -> Vec<(String, HealthStatus)> {
158        // Snapshot (name, check) pairs under the lock; run checks after
159        // releasing it. See `snapshot_checks` for the reentrancy rationale.
160        let snapshot: Vec<(String, HealthCheck)> = {
161            let registry = Self::global();
162            let Ok(components) = registry.components.lock() else {
163                return Vec::new();
164            };
165            components
166                .iter()
167                .map(|c| (c.name.clone(), Arc::clone(&c.check)))
168                .collect()
169        };
170        snapshot
171            .into_iter()
172            .map(|(name, check)| (name, check()))
173            .collect()
174    }
175
176    /// Get a JSON representation of the health state.
177    ///
178    /// Suitable for a `/health/detailed` endpoint response.
179    #[cfg(feature = "serde_json")]
180    #[must_use]
181    pub fn to_json() -> serde_json::Value {
182        let components = Self::components();
183        let overall = if Self::is_healthy() {
184            "healthy"
185        } else if Self::is_ready() {
186            "degraded"
187        } else {
188            "unhealthy"
189        };
190
191        serde_json::json!({
192            "status": overall,
193            "components": components.iter().map(|(name, status)| {
194                serde_json::json!({
195                    "name": name,
196                    "status": status.as_str(),
197                })
198            }).collect::<Vec<_>>()
199        })
200    }
201
202    /// Clear all registered components (for testing only).
203    #[cfg(test)]
204    pub(crate) fn reset() {
205        let registry = Self::global();
206        if let Ok(mut components) = registry.components.lock() {
207            components.clear();
208        }
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use std::sync::atomic::{AtomicU8, Ordering};
215
216    use super::*;
217
218    /// Tests share global statics -- serialise them.
219    static TEST_LOCK: Mutex<()> = Mutex::new(());
220
221    /// Acquire the shared test lock and reset global registry state.
222    /// Returned guard holds the lock for the caller's test body.
223    fn serial_test_guard() -> std::sync::MutexGuard<'static, ()> {
224        let guard = TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
225        HealthRegistry::reset();
226        guard
227    }
228
229    #[test]
230    fn empty_registry_is_healthy() {
231        let _guard = serial_test_guard();
232
233        assert!(HealthRegistry::is_healthy());
234        assert!(HealthRegistry::is_ready());
235        assert!(HealthRegistry::components().is_empty());
236    }
237
238    #[test]
239    fn register_and_check_healthy() {
240        let _guard = serial_test_guard();
241
242        HealthRegistry::register("transport", || HealthStatus::Healthy);
243        HealthRegistry::register("database", || HealthStatus::Healthy);
244
245        assert!(HealthRegistry::is_healthy());
246        assert!(HealthRegistry::is_ready());
247
248        let components = HealthRegistry::components();
249        assert_eq!(components.len(), 2);
250        assert_eq!(components[0].0, "transport");
251        assert_eq!(components[0].1, HealthStatus::Healthy);
252        assert_eq!(components[1].0, "database");
253        assert_eq!(components[1].1, HealthStatus::Healthy);
254    }
255
256    #[test]
257    fn unhealthy_component_fails_check() {
258        let _guard = serial_test_guard();
259
260        HealthRegistry::register("transport", || HealthStatus::Healthy);
261        HealthRegistry::register("database", || HealthStatus::Unhealthy);
262
263        assert!(!HealthRegistry::is_healthy());
264        assert!(!HealthRegistry::is_ready());
265    }
266
267    #[test]
268    fn degraded_is_ready_but_not_healthy() {
269        let _guard = serial_test_guard();
270
271        HealthRegistry::register("transport", || HealthStatus::Healthy);
272        HealthRegistry::register("circuit_breaker", || HealthStatus::Degraded);
273
274        assert!(!HealthRegistry::is_healthy());
275        assert!(HealthRegistry::is_ready());
276    }
277
278    #[test]
279    fn dynamic_health_check_reflects_state_changes() {
280        let _guard = serial_test_guard();
281
282        // Simulate a component whose health changes at runtime
283        let state = Arc::new(AtomicU8::new(0)); // 0=healthy, 1=degraded, 2=unhealthy
284        let state_clone = state.clone();
285
286        HealthRegistry::register("dynamic", move || {
287            match state_clone.load(Ordering::Relaxed) {
288                0 => HealthStatus::Healthy,
289                1 => HealthStatus::Degraded,
290                _ => HealthStatus::Unhealthy,
291            }
292        });
293
294        // Initially healthy
295        assert!(HealthRegistry::is_healthy());
296        assert!(HealthRegistry::is_ready());
297
298        // Transition to degraded
299        state.store(1, Ordering::Relaxed);
300        assert!(!HealthRegistry::is_healthy());
301        assert!(HealthRegistry::is_ready());
302
303        // Transition to unhealthy
304        state.store(2, Ordering::Relaxed);
305        assert!(!HealthRegistry::is_healthy());
306        assert!(!HealthRegistry::is_ready());
307
308        // Recovery back to healthy
309        state.store(0, Ordering::Relaxed);
310        assert!(HealthRegistry::is_healthy());
311        assert!(HealthRegistry::is_ready());
312    }
313
314    #[test]
315    fn health_status_as_str() {
316        assert_eq!(HealthStatus::Healthy.as_str(), "healthy");
317        assert_eq!(HealthStatus::Degraded.as_str(), "degraded");
318        assert_eq!(HealthStatus::Unhealthy.as_str(), "unhealthy");
319    }
320
321    #[test]
322    #[cfg(feature = "serde_json")]
323    fn to_json_includes_all_components() {
324        let _guard = serial_test_guard();
325
326        HealthRegistry::register("kafka", || HealthStatus::Healthy);
327        HealthRegistry::register("clickhouse", || HealthStatus::Degraded);
328
329        let json = HealthRegistry::to_json();
330
331        assert_eq!(json["status"], "degraded");
332
333        let components = json["components"].as_array().unwrap();
334        assert_eq!(components.len(), 2);
335
336        assert_eq!(components[0]["name"], "kafka");
337        assert_eq!(components[0]["status"], "healthy");
338
339        assert_eq!(components[1]["name"], "clickhouse");
340        assert_eq!(components[1]["status"], "degraded");
341    }
342
343    #[test]
344    #[cfg(feature = "serde_json")]
345    fn to_json_empty_registry() {
346        let _guard = serial_test_guard();
347
348        let json = HealthRegistry::to_json();
349        assert_eq!(json["status"], "healthy");
350        assert!(json["components"].as_array().unwrap().is_empty());
351    }
352
353    #[test]
354    #[cfg(feature = "serde_json")]
355    fn to_json_unhealthy_status() {
356        let _guard = serial_test_guard();
357
358        HealthRegistry::register("broken", || HealthStatus::Unhealthy);
359
360        let json = HealthRegistry::to_json();
361        assert_eq!(json["status"], "unhealthy");
362    }
363}