1use std::time::Instant;
20use serde::{Deserialize, Serialize};
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
24#[serde(rename_all = "lowercase")]
25pub enum HealthStatus {
26 Healthy,
28 Degraded,
30 Unhealthy,
32}
33
34impl HealthStatus {
35 pub fn http_status_code(&self) -> u16 {
37 match self {
38 HealthStatus::Healthy => 200,
39 HealthStatus::Degraded => 200, HealthStatus::Unhealthy => 503,
41 }
42 }
43
44 pub fn is_operational(&self) -> bool {
46 matches!(self, HealthStatus::Healthy | HealthStatus::Degraded)
47 }
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct ComponentHealth {
53 pub name: String,
55 pub status: HealthStatus,
57 #[serde(skip_serializing_if = "Option::is_none")]
59 pub message: Option<String>,
60 #[serde(skip_serializing_if = "Option::is_none")]
62 pub response_time_ms: Option<u64>,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct HealthResponse {
68 pub status: HealthStatus,
70 pub version: String,
72 pub uptime_secs: u64,
74 pub checks: Vec<ComponentHealth>,
76 pub timestamp: String,
78}
79
80#[derive(Debug, Clone)]
82pub struct HealthCheckConfig {
83 pub success_rate_healthy: f64,
85 pub success_rate_degraded: f64,
87 pub max_latency_healthy_ms: u64,
89 pub max_latency_degraded_ms: u64,
91 pub max_open_circuits_healthy: usize,
93 pub max_open_circuits_degraded: usize,
95}
96
97impl Default for HealthCheckConfig {
98 fn default() -> Self {
99 Self {
100 success_rate_healthy: 0.95,
101 success_rate_degraded: 0.80,
102 max_latency_healthy_ms: 5000,
103 max_latency_degraded_ms: 15000,
104 max_open_circuits_healthy: 2,
105 max_open_circuits_degraded: 10,
106 }
107 }
108}
109
110pub struct HealthChecker {
112 config: HealthCheckConfig,
113 started_at: Instant,
114 version: String,
115}
116
117impl HealthChecker {
118 pub fn new(config: HealthCheckConfig) -> Self {
120 Self {
121 config,
122 started_at: Instant::now(),
123 version: env!("CARGO_PKG_VERSION").to_string(),
124 }
125 }
126
127 pub fn default_config() -> Self {
129 Self::new(HealthCheckConfig::default())
130 }
131
132 pub fn liveness(&self) -> HealthResponse {
135 HealthResponse {
136 status: HealthStatus::Healthy,
137 version: self.version.clone(),
138 uptime_secs: self.started_at.elapsed().as_secs(),
139 checks: vec![ComponentHealth {
140 name: "process".to_string(),
141 status: HealthStatus::Healthy,
142 message: Some("Process is running".to_string()),
143 response_time_ms: None,
144 }],
145 timestamp: chrono::Utc::now().to_rfc3339(),
146 }
147 }
148
149 pub fn readiness(&self, metrics: &HealthMetrics) -> HealthResponse {
151 let mut checks = Vec::new();
152 let mut overall_status = HealthStatus::Healthy;
153
154 let success_check = self.check_success_rate(metrics.success_rate);
156 if success_check.status == HealthStatus::Unhealthy {
157 overall_status = HealthStatus::Unhealthy;
158 } else if success_check.status == HealthStatus::Degraded && overall_status == HealthStatus::Healthy {
159 overall_status = HealthStatus::Degraded;
160 }
161 checks.push(success_check);
162
163 let latency_check = self.check_latency(metrics.avg_latency_ms);
165 if latency_check.status == HealthStatus::Unhealthy {
166 overall_status = HealthStatus::Unhealthy;
167 } else if latency_check.status == HealthStatus::Degraded && overall_status == HealthStatus::Healthy {
168 overall_status = HealthStatus::Degraded;
169 }
170 checks.push(latency_check);
171
172 let circuit_check = self.check_circuits(metrics.open_circuits);
174 if circuit_check.status == HealthStatus::Unhealthy {
175 overall_status = HealthStatus::Unhealthy;
176 } else if circuit_check.status == HealthStatus::Degraded && overall_status == HealthStatus::Healthy {
177 overall_status = HealthStatus::Degraded;
178 }
179 checks.push(circuit_check);
180
181 if let Some(memory_mb) = metrics.memory_mb {
183 checks.push(ComponentHealth {
184 name: "memory".to_string(),
185 status: HealthStatus::Healthy,
186 message: Some(format!("{} MB used", memory_mb)),
187 response_time_ms: None,
188 });
189 }
190
191 HealthResponse {
192 status: overall_status,
193 version: self.version.clone(),
194 uptime_secs: self.started_at.elapsed().as_secs(),
195 checks,
196 timestamp: chrono::Utc::now().to_rfc3339(),
197 }
198 }
199
200 fn check_success_rate(&self, rate: f64) -> ComponentHealth {
201 let (status, message) = if rate >= self.config.success_rate_healthy {
202 (HealthStatus::Healthy, format!("{:.1}% success rate", rate * 100.0))
203 } else if rate >= self.config.success_rate_degraded {
204 (HealthStatus::Degraded, format!("{:.1}% success rate (degraded)", rate * 100.0))
205 } else {
206 (HealthStatus::Unhealthy, format!("{:.1}% success rate (critical)", rate * 100.0))
207 };
208
209 ComponentHealth {
210 name: "success_rate".to_string(),
211 status,
212 message: Some(message),
213 response_time_ms: None,
214 }
215 }
216
217 fn check_latency(&self, latency_ms: f64) -> ComponentHealth {
218 let (status, message) = if latency_ms <= self.config.max_latency_healthy_ms as f64 {
219 (HealthStatus::Healthy, format!("{:.0}ms avg latency", latency_ms))
220 } else if latency_ms <= self.config.max_latency_degraded_ms as f64 {
221 (HealthStatus::Degraded, format!("{:.0}ms avg latency (high)", latency_ms))
222 } else {
223 (HealthStatus::Unhealthy, format!("{:.0}ms avg latency (critical)", latency_ms))
224 };
225
226 ComponentHealth {
227 name: "latency".to_string(),
228 status,
229 message: Some(message),
230 response_time_ms: Some(latency_ms as u64),
231 }
232 }
233
234 fn check_circuits(&self, open_circuits: usize) -> ComponentHealth {
235 let (status, message) = if open_circuits <= self.config.max_open_circuits_healthy {
236 (HealthStatus::Healthy, format!("{} open circuits", open_circuits))
237 } else if open_circuits <= self.config.max_open_circuits_degraded {
238 (HealthStatus::Degraded, format!("{} open circuits (elevated)", open_circuits))
239 } else {
240 (HealthStatus::Unhealthy, format!("{} open circuits (critical)", open_circuits))
241 };
242
243 ComponentHealth {
244 name: "circuit_breakers".to_string(),
245 status,
246 message: Some(message),
247 response_time_ms: None,
248 }
249 }
250}
251
252#[derive(Debug, Clone, Default)]
254pub struct HealthMetrics {
255 pub success_rate: f64,
257 pub avg_latency_ms: f64,
259 pub open_circuits: usize,
261 pub memory_mb: Option<u64>,
263 pub active_requests: usize,
265}
266
267impl HealthResponse {
268 pub fn to_json(&self) -> String {
270 serde_json::to_string_pretty(self).unwrap_or_else(|_| "{}".to_string())
271 }
272
273 pub fn to_json_compact(&self) -> String {
275 serde_json::to_string(self).unwrap_or_else(|_| "{}".to_string())
276 }
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282
283 #[test]
284 fn test_liveness_always_healthy() {
285 let checker = HealthChecker::default_config();
286 let response = checker.liveness();
287 assert_eq!(response.status, HealthStatus::Healthy);
288 }
289
290 #[test]
291 fn test_readiness_healthy() {
292 let checker = HealthChecker::default_config();
293 let metrics = HealthMetrics {
294 success_rate: 0.99,
295 avg_latency_ms: 100.0,
296 open_circuits: 0,
297 memory_mb: Some(256),
298 active_requests: 5,
299 };
300
301 let response = checker.readiness(&metrics);
302 assert_eq!(response.status, HealthStatus::Healthy);
303 }
304
305 #[test]
306 fn test_readiness_degraded() {
307 let checker = HealthChecker::default_config();
308 let metrics = HealthMetrics {
309 success_rate: 0.85,
310 avg_latency_ms: 8000.0,
311 open_circuits: 5,
312 memory_mb: None,
313 active_requests: 10,
314 };
315
316 let response = checker.readiness(&metrics);
317 assert_eq!(response.status, HealthStatus::Degraded);
318 }
319
320 #[test]
321 fn test_readiness_unhealthy() {
322 let checker = HealthChecker::default_config();
323 let metrics = HealthMetrics {
324 success_rate: 0.50,
325 avg_latency_ms: 20000.0,
326 open_circuits: 20,
327 memory_mb: None,
328 active_requests: 0,
329 };
330
331 let response = checker.readiness(&metrics);
332 assert_eq!(response.status, HealthStatus::Unhealthy);
333 }
334
335 #[test]
336 fn test_json_output() {
337 let checker = HealthChecker::default_config();
338 let response = checker.liveness();
339 let json = response.to_json();
340
341 assert!(json.contains("\"status\""));
342 assert!(json.contains("\"version\""));
343 assert!(json.contains("\"uptime_secs\""));
344 }
345
346 #[test]
347 fn test_http_status_codes() {
348 assert_eq!(HealthStatus::Healthy.http_status_code(), 200);
349 assert_eq!(HealthStatus::Degraded.http_status_code(), 200);
350 assert_eq!(HealthStatus::Unhealthy.http_status_code(), 503);
351 }
352}