sentinel_agent_protocol/v2/
health.rs

1//! Health reporting for Protocol v2.
2
3use serde::{Deserialize, Serialize};
4
5/// Health status reported by agent.
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct HealthStatus {
8    pub agent_id: String,
9    pub state: HealthState,
10    pub message: Option<String>,
11    pub load: Option<LoadMetrics>,
12    pub resources: Option<ResourceMetrics>,
13    pub valid_until_ms: Option<u64>,
14    pub timestamp_ms: u64,
15}
16
17impl HealthStatus {
18    pub fn healthy(agent_id: impl Into<String>) -> Self {
19        Self {
20            agent_id: agent_id.into(),
21            state: HealthState::Healthy,
22            message: None,
23            load: None,
24            resources: None,
25            valid_until_ms: None,
26            timestamp_ms: now_ms(),
27        }
28    }
29
30    pub fn degraded(agent_id: impl Into<String>, disabled: Vec<String>, multiplier: f32) -> Self {
31        Self {
32            agent_id: agent_id.into(),
33            state: HealthState::Degraded { disabled_features: disabled, timeout_multiplier: multiplier },
34            message: None,
35            load: None,
36            resources: None,
37            valid_until_ms: None,
38            timestamp_ms: now_ms(),
39        }
40    }
41
42    pub fn unhealthy(agent_id: impl Into<String>, reason: impl Into<String>, recoverable: bool) -> Self {
43        Self {
44            agent_id: agent_id.into(),
45            state: HealthState::Unhealthy { reason: reason.into(), recoverable },
46            message: None,
47            load: None,
48            resources: None,
49            valid_until_ms: None,
50            timestamp_ms: now_ms(),
51        }
52    }
53
54    pub fn is_healthy(&self) -> bool {
55        matches!(self.state, HealthState::Healthy)
56    }
57}
58
59/// Health state.
60#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
61#[serde(rename_all = "snake_case", tag = "status")]
62pub enum HealthState {
63    Healthy,
64    Degraded { disabled_features: Vec<String>, timeout_multiplier: f32 },
65    Draining { eta_ms: Option<u64> },
66    Unhealthy { reason: String, recoverable: bool },
67}
68
69/// Load metrics.
70#[derive(Debug, Clone, Default, Serialize, Deserialize)]
71pub struct LoadMetrics {
72    pub in_flight: u32,
73    pub queue_depth: u32,
74    pub avg_latency_ms: f32,
75    pub p50_latency_ms: f32,
76    pub p95_latency_ms: f32,
77    pub p99_latency_ms: f32,
78    pub requests_processed: u64,
79    pub requests_rejected: u64,
80    pub requests_timed_out: u64,
81}
82
83/// Resource metrics.
84#[derive(Debug, Clone, Default, Serialize, Deserialize)]
85pub struct ResourceMetrics {
86    pub cpu_percent: Option<f32>,
87    pub memory_bytes: Option<u64>,
88    pub memory_limit: Option<u64>,
89    pub active_threads: Option<u32>,
90    pub open_fds: Option<u32>,
91    pub fd_limit: Option<u32>,
92    pub connections: Option<u32>,
93}
94
95fn now_ms() -> u64 {
96    std::time::SystemTime::now()
97        .duration_since(std::time::UNIX_EPOCH)
98        .map(|d| d.as_millis() as u64)
99        .unwrap_or(0)
100}
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105
106    #[test]
107    fn test_health_status_builders() {
108        let healthy = HealthStatus::healthy("test-agent");
109        assert!(healthy.is_healthy());
110
111        let unhealthy = HealthStatus::unhealthy("test-agent", "OOM", true);
112        assert!(!unhealthy.is_healthy());
113    }
114}