Skip to main content

grapsus_agent_protocol/v2/
health.rs

1//! Health reporting for Protocol v2.
2
3use serde::{Deserialize, Serialize};
4
5/// Health status reported by agent.
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct HealthStatus {
8    pub agent_id: String,
9    pub state: HealthState,
10    pub message: Option<String>,
11    pub load: Option<LoadMetrics>,
12    pub resources: Option<ResourceMetrics>,
13    pub valid_until_ms: Option<u64>,
14    pub timestamp_ms: u64,
15}
16
17impl HealthStatus {
18    pub fn healthy(agent_id: impl Into<String>) -> Self {
19        Self {
20            agent_id: agent_id.into(),
21            state: HealthState::Healthy,
22            message: None,
23            load: None,
24            resources: None,
25            valid_until_ms: None,
26            timestamp_ms: now_ms(),
27        }
28    }
29
30    pub fn degraded(agent_id: impl Into<String>, disabled: Vec<String>, multiplier: f32) -> Self {
31        Self {
32            agent_id: agent_id.into(),
33            state: HealthState::Degraded {
34                disabled_features: disabled,
35                timeout_multiplier: multiplier,
36            },
37            message: None,
38            load: None,
39            resources: None,
40            valid_until_ms: None,
41            timestamp_ms: now_ms(),
42        }
43    }
44
45    pub fn unhealthy(
46        agent_id: impl Into<String>,
47        reason: impl Into<String>,
48        recoverable: bool,
49    ) -> Self {
50        Self {
51            agent_id: agent_id.into(),
52            state: HealthState::Unhealthy {
53                reason: reason.into(),
54                recoverable,
55            },
56            message: None,
57            load: None,
58            resources: None,
59            valid_until_ms: None,
60            timestamp_ms: now_ms(),
61        }
62    }
63
64    pub fn is_healthy(&self) -> bool {
65        matches!(self.state, HealthState::Healthy)
66    }
67}
68
69/// Health state.
70#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
71#[serde(rename_all = "snake_case", tag = "status")]
72pub enum HealthState {
73    Healthy,
74    Degraded {
75        disabled_features: Vec<String>,
76        timeout_multiplier: f32,
77    },
78    Draining {
79        eta_ms: Option<u64>,
80    },
81    Unhealthy {
82        reason: String,
83        recoverable: bool,
84    },
85}
86
87/// Load metrics.
88#[derive(Debug, Clone, Default, Serialize, Deserialize)]
89pub struct LoadMetrics {
90    pub in_flight: u32,
91    pub queue_depth: u32,
92    pub avg_latency_ms: f32,
93    pub p50_latency_ms: f32,
94    pub p95_latency_ms: f32,
95    pub p99_latency_ms: f32,
96    pub requests_processed: u64,
97    pub requests_rejected: u64,
98    pub requests_timed_out: u64,
99}
100
101/// Resource metrics.
102#[derive(Debug, Clone, Default, Serialize, Deserialize)]
103pub struct ResourceMetrics {
104    pub cpu_percent: Option<f32>,
105    pub memory_bytes: Option<u64>,
106    pub memory_limit: Option<u64>,
107    pub active_threads: Option<u32>,
108    pub open_fds: Option<u32>,
109    pub fd_limit: Option<u32>,
110    pub connections: Option<u32>,
111}
112
113fn now_ms() -> u64 {
114    std::time::SystemTime::now()
115        .duration_since(std::time::UNIX_EPOCH)
116        .map(|d| d.as_millis() as u64)
117        .unwrap_or(0)
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123
124    #[test]
125    fn test_health_status_builders() {
126        let healthy = HealthStatus::healthy("test-agent");
127        assert!(healthy.is_healthy());
128
129        let unhealthy = HealthStatus::unhealthy("test-agent", "OOM", true);
130        assert!(!unhealthy.is_healthy());
131    }
132}