use std::time::Instant;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum HealthStatus {
Healthy,
Degraded,
Unhealthy,
}
impl HealthStatus {
pub fn http_status_code(&self) -> u16 {
match self {
HealthStatus::Healthy => 200,
HealthStatus::Degraded => 200, HealthStatus::Unhealthy => 503,
}
}
pub fn is_operational(&self) -> bool {
matches!(self, HealthStatus::Healthy | HealthStatus::Degraded)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComponentHealth {
pub name: String,
pub status: HealthStatus,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub response_time_ms: Option<u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthResponse {
pub status: HealthStatus,
pub version: String,
pub uptime_secs: u64,
pub checks: Vec<ComponentHealth>,
pub timestamp: String,
}
#[derive(Debug, Clone)]
pub struct HealthCheckConfig {
pub success_rate_healthy: f64,
pub success_rate_degraded: f64,
pub max_latency_healthy_ms: u64,
pub max_latency_degraded_ms: u64,
pub max_open_circuits_healthy: usize,
pub max_open_circuits_degraded: usize,
}
impl Default for HealthCheckConfig {
fn default() -> Self {
Self {
success_rate_healthy: 0.95,
success_rate_degraded: 0.80,
max_latency_healthy_ms: 5000,
max_latency_degraded_ms: 15000,
max_open_circuits_healthy: 2,
max_open_circuits_degraded: 10,
}
}
}
pub struct HealthChecker {
config: HealthCheckConfig,
started_at: Instant,
version: String,
}
impl HealthChecker {
pub fn new(config: HealthCheckConfig) -> Self {
Self {
config,
started_at: Instant::now(),
version: env!("CARGO_PKG_VERSION").to_string(),
}
}
pub fn default_config() -> Self {
Self::new(HealthCheckConfig::default())
}
pub fn liveness(&self) -> HealthResponse {
HealthResponse {
status: HealthStatus::Healthy,
version: self.version.clone(),
uptime_secs: self.started_at.elapsed().as_secs(),
checks: vec![ComponentHealth {
name: "process".to_string(),
status: HealthStatus::Healthy,
message: Some("Process is running".to_string()),
response_time_ms: None,
}],
timestamp: chrono::Utc::now().to_rfc3339(),
}
}
pub fn readiness(&self, metrics: &HealthMetrics) -> HealthResponse {
let mut checks = Vec::new();
let mut overall_status = HealthStatus::Healthy;
let success_check = self.check_success_rate(metrics.success_rate);
if success_check.status == HealthStatus::Unhealthy {
overall_status = HealthStatus::Unhealthy;
} else if success_check.status == HealthStatus::Degraded && overall_status == HealthStatus::Healthy {
overall_status = HealthStatus::Degraded;
}
checks.push(success_check);
let latency_check = self.check_latency(metrics.avg_latency_ms);
if latency_check.status == HealthStatus::Unhealthy {
overall_status = HealthStatus::Unhealthy;
} else if latency_check.status == HealthStatus::Degraded && overall_status == HealthStatus::Healthy {
overall_status = HealthStatus::Degraded;
}
checks.push(latency_check);
let circuit_check = self.check_circuits(metrics.open_circuits);
if circuit_check.status == HealthStatus::Unhealthy {
overall_status = HealthStatus::Unhealthy;
} else if circuit_check.status == HealthStatus::Degraded && overall_status == HealthStatus::Healthy {
overall_status = HealthStatus::Degraded;
}
checks.push(circuit_check);
if let Some(memory_mb) = metrics.memory_mb {
checks.push(ComponentHealth {
name: "memory".to_string(),
status: HealthStatus::Healthy,
message: Some(format!("{} MB used", memory_mb)),
response_time_ms: None,
});
}
HealthResponse {
status: overall_status,
version: self.version.clone(),
uptime_secs: self.started_at.elapsed().as_secs(),
checks,
timestamp: chrono::Utc::now().to_rfc3339(),
}
}
fn check_success_rate(&self, rate: f64) -> ComponentHealth {
let (status, message) = if rate >= self.config.success_rate_healthy {
(HealthStatus::Healthy, format!("{:.1}% success rate", rate * 100.0))
} else if rate >= self.config.success_rate_degraded {
(HealthStatus::Degraded, format!("{:.1}% success rate (degraded)", rate * 100.0))
} else {
(HealthStatus::Unhealthy, format!("{:.1}% success rate (critical)", rate * 100.0))
};
ComponentHealth {
name: "success_rate".to_string(),
status,
message: Some(message),
response_time_ms: None,
}
}
fn check_latency(&self, latency_ms: f64) -> ComponentHealth {
let (status, message) = if latency_ms <= self.config.max_latency_healthy_ms as f64 {
(HealthStatus::Healthy, format!("{:.0}ms avg latency", latency_ms))
} else if latency_ms <= self.config.max_latency_degraded_ms as f64 {
(HealthStatus::Degraded, format!("{:.0}ms avg latency (high)", latency_ms))
} else {
(HealthStatus::Unhealthy, format!("{:.0}ms avg latency (critical)", latency_ms))
};
ComponentHealth {
name: "latency".to_string(),
status,
message: Some(message),
response_time_ms: Some(latency_ms as u64),
}
}
fn check_circuits(&self, open_circuits: usize) -> ComponentHealth {
let (status, message) = if open_circuits <= self.config.max_open_circuits_healthy {
(HealthStatus::Healthy, format!("{} open circuits", open_circuits))
} else if open_circuits <= self.config.max_open_circuits_degraded {
(HealthStatus::Degraded, format!("{} open circuits (elevated)", open_circuits))
} else {
(HealthStatus::Unhealthy, format!("{} open circuits (critical)", open_circuits))
};
ComponentHealth {
name: "circuit_breakers".to_string(),
status,
message: Some(message),
response_time_ms: None,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct HealthMetrics {
pub success_rate: f64,
pub avg_latency_ms: f64,
pub open_circuits: usize,
pub memory_mb: Option<u64>,
pub active_requests: usize,
}
impl HealthResponse {
pub fn to_json(&self) -> String {
serde_json::to_string_pretty(self).unwrap_or_else(|_| "{}".to_string())
}
pub fn to_json_compact(&self) -> String {
serde_json::to_string(self).unwrap_or_else(|_| "{}".to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_liveness_always_healthy() {
let checker = HealthChecker::default_config();
let response = checker.liveness();
assert_eq!(response.status, HealthStatus::Healthy);
}
#[test]
fn test_readiness_healthy() {
let checker = HealthChecker::default_config();
let metrics = HealthMetrics {
success_rate: 0.99,
avg_latency_ms: 100.0,
open_circuits: 0,
memory_mb: Some(256),
active_requests: 5,
};
let response = checker.readiness(&metrics);
assert_eq!(response.status, HealthStatus::Healthy);
}
#[test]
fn test_readiness_degraded() {
let checker = HealthChecker::default_config();
let metrics = HealthMetrics {
success_rate: 0.85,
avg_latency_ms: 8000.0,
open_circuits: 5,
memory_mb: None,
active_requests: 10,
};
let response = checker.readiness(&metrics);
assert_eq!(response.status, HealthStatus::Degraded);
}
#[test]
fn test_readiness_unhealthy() {
let checker = HealthChecker::default_config();
let metrics = HealthMetrics {
success_rate: 0.50,
avg_latency_ms: 20000.0,
open_circuits: 20,
memory_mb: None,
active_requests: 0,
};
let response = checker.readiness(&metrics);
assert_eq!(response.status, HealthStatus::Unhealthy);
}
#[test]
fn test_json_output() {
let checker = HealthChecker::default_config();
let response = checker.liveness();
let json = response.to_json();
assert!(json.contains("\"status\""));
assert!(json.contains("\"version\""));
assert!(json.contains("\"uptime_secs\""));
}
#[test]
fn test_http_status_codes() {
assert_eq!(HealthStatus::Healthy.http_status_code(), 200);
assert_eq!(HealthStatus::Degraded.http_status_code(), 200);
assert_eq!(HealthStatus::Unhealthy.http_status_code(), 503);
}
}