mecha10_runtime/
health.rs

1//! Health checking infrastructure
2
3use std::collections::HashMap;
4use std::sync::Arc;
5use std::time::Duration;
6use tokio::sync::RwLock;
7
8/// Health status of a node
9#[derive(Clone, Debug, PartialEq)]
10pub enum HealthStatus {
11    /// Node is healthy and operating normally
12    Healthy,
13
14    /// Node is degraded but still functional
15    Degraded { reason: String },
16
17    /// Node is unhealthy and may need restart
18    Unhealthy { reason: String },
19}
20
21impl HealthStatus {
22    /// Check if status is healthy
23    pub fn is_healthy(&self) -> bool {
24        matches!(self, HealthStatus::Healthy)
25    }
26
27    /// Check if status is degraded
28    pub fn is_degraded(&self) -> bool {
29        matches!(self, HealthStatus::Degraded { .. })
30    }
31
32    /// Check if status is unhealthy
33    pub fn is_unhealthy(&self) -> bool {
34        matches!(self, HealthStatus::Unhealthy { .. })
35    }
36
37    /// Get reason if not healthy
38    pub fn reason(&self) -> Option<&str> {
39        match self {
40            HealthStatus::Healthy => None,
41            HealthStatus::Degraded { reason } => Some(reason),
42            HealthStatus::Unhealthy { reason } => Some(reason),
43        }
44    }
45}
46
47/// Health checker for monitoring node health
48pub struct HealthChecker {
49    checks: Arc<RwLock<HashMap<String, HealthStatus>>>,
50    interval: Duration,
51}
52
53impl HealthChecker {
54    /// Create a new health checker with specified interval
55    pub fn new(interval: Duration) -> Self {
56        Self {
57            checks: Arc::new(RwLock::new(HashMap::new())),
58            interval,
59        }
60    }
61
62    /// Register a node for health checking
63    pub async fn register(&self, name: String, initial_status: HealthStatus) {
64        let mut checks = self.checks.write().await;
65        checks.insert(name, initial_status);
66    }
67
68    /// Update health status for a node
69    pub async fn update(&self, name: &str, status: HealthStatus) {
70        let mut checks = self.checks.write().await;
71        checks.insert(name.to_string(), status);
72    }
73
74    /// Get health status for a specific node
75    pub async fn check_one(&self, name: &str) -> Option<HealthStatus> {
76        let checks = self.checks.read().await;
77        checks.get(name).cloned()
78    }
79
80    /// Get health status for all nodes
81    pub async fn check_all(&self) -> HashMap<String, HealthStatus> {
82        let checks = self.checks.read().await;
83        checks.clone()
84    }
85
86    /// Get the check interval
87    pub fn interval(&self) -> Duration {
88        self.interval
89    }
90
91    /// Remove a node from health checking
92    pub async fn unregister(&self, name: &str) {
93        let mut checks = self.checks.write().await;
94        checks.remove(name);
95    }
96}