use crate::utils::config::HealthConfig;
use crate::utils::metrics::MetricRegistry;
use crate::utils::metrics::OpenCratesMetrics;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use std::time::{SystemTime, UNIX_EPOCH};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum HealthStatus {
Healthy,
Unhealthy,
Degraded,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct CheckResult {
pub name: String,
pub status: HealthStatus,
pub message: Option<String>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct HealthInfo {
pub overall_status: HealthStatus,
pub checks: Vec<CheckResult>,
pub timestamp: u64,
}
#[async_trait]
pub trait HealthCheck: Send + Sync + std::fmt::Debug {
async fn check(&self) -> CheckResult;
fn name(&self) -> &str;
}
pub async fn check_health() -> HealthInfo {
let mut checks = Vec::new();
checks.push(CheckResult {
name: "database".to_string(),
status: HealthStatus::Healthy,
message: None,
});
checks.push(CheckResult {
name: "ai_service".to_string(),
status: HealthStatus::Healthy,
message: None,
});
let all_healthy = checks.iter().all(|c| c.status == HealthStatus::Healthy);
HealthInfo {
overall_status: if all_healthy {
HealthStatus::Healthy
} else {
HealthStatus::Unhealthy
},
timestamp: SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs(),
checks,
}
}
#[derive(Debug)]
pub struct HealthManager {
checks: HashMap<String, Arc<dyn HealthCheck>>,
dependencies: HashMap<String, String>,
registry: MetricRegistry,
timeout: Duration,
}
impl HealthManager {
#[must_use]
pub fn new(registry: MetricRegistry) -> Self {
Self {
checks: HashMap::new(),
dependencies: HashMap::new(),
timeout: Duration::from_secs(5),
registry,
}
}
#[must_use]
pub fn dependencies(&self) -> &HashMap<String, String> {
&self.dependencies
}
#[must_use]
pub fn registry(&self) -> &MetricRegistry {
&self.registry
}
pub async fn register_check(&mut self, name: &str, check: Arc<dyn HealthCheck>) {
self.checks.insert(name.to_string(), check);
}
pub async fn overall_health(&self) -> HealthStatus {
for check in self.checks.values() {
let result = check.check().await;
if result.status != HealthStatus::Healthy {
return HealthStatus::Unhealthy;
}
}
HealthStatus::Healthy
}
pub async fn check_health(&self, name: &str) -> CheckResult {
if let Some(check) = self.checks.get(name) {
let result = tokio::time::timeout(self.timeout, check.check()).await;
match result {
Ok(check_result) => check_result,
Err(_) => CheckResult {
name: name.to_string(),
status: HealthStatus::Degraded,
message: Some("timeout".to_string()),
},
}
} else {
CheckResult {
name: name.to_string(),
status: HealthStatus::Unhealthy,
message: Some("check not found".to_string()),
}
}
}
pub async fn health_info(&self) -> HealthInfo {
let mut checks = Vec::new();
for check in self.checks.values() {
let result = check.check().await;
checks.push(result);
}
let overall_status = if checks.iter().all(|c| c.status == HealthStatus::Healthy) {
HealthStatus::Healthy
} else {
HealthStatus::Unhealthy
};
HealthInfo {
overall_status,
checks,
timestamp: SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs(),
}
}
pub async fn run_check(&self, name: &str) -> CheckResult {
if let Some(check) = self.checks.get(name) {
check.check().await
} else {
CheckResult {
name: name.to_string(),
status: HealthStatus::Unhealthy,
message: Some("check not found".to_string()),
}
}
}
pub async fn new_with_config(
_metrics: Arc<OpenCratesMetrics>,
_config: &HealthConfig,
) -> Result<Self, anyhow::Error> {
let metric_registry = MetricRegistry::new();
let manager = Self::new(metric_registry);
Ok(manager)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use tokio::time::sleep;
#[derive(Debug)]
pub struct MockHealthCheck {
name: String,
status: HealthStatus,
delay: Option<Duration>,
}
impl MockHealthCheck {
pub fn new(status: HealthStatus) -> Self {
Self {
name: "mock_check".to_string(),
status,
delay: None,
}
}
pub fn with_delay(mut self, delay: Duration) -> Self {
self.delay = Some(delay);
self
}
pub fn with_name(mut self, name: &str) -> Self {
self.name = name.to_string();
self
}
}
#[async_trait]
impl HealthCheck for MockHealthCheck {
async fn check(&self) -> CheckResult {
if let Some(delay) = self.delay {
sleep(delay).await;
}
CheckResult {
name: self.name.clone(),
status: self.status.clone(),
message: None,
}
}
fn name(&self) -> &str {
&self.name
}
}
#[tokio::test]
async fn test_health_manager_basic() {
let registry = MetricRegistry::new();
let mut health_manager = HealthManager::new(registry);
let check = Arc::new(MockHealthCheck::new(HealthStatus::Healthy));
health_manager.register_check("test_check", check).await;
let status = health_manager.overall_health().await;
assert_eq!(status, HealthStatus::Healthy);
}
#[tokio::test]
async fn test_health_manager_timeout() {
let registry = MetricRegistry::new();
let mut health_manager = HealthManager::new(registry);
let check = Arc::new(
MockHealthCheck::new(HealthStatus::Healthy).with_delay(Duration::from_secs(10)),
);
health_manager.register_check("slow_check", check).await;
let status = health_manager.check_health("slow_check").await;
assert_eq!(status.status, HealthStatus::Degraded);
assert!(status
.message
.as_ref()
.is_some_and(|msg| msg.contains("timeout")));
}
#[tokio::test]
async fn test_health_info() {
let registry = MetricRegistry::new();
let mut health_manager = HealthManager::new(registry);
let check = Arc::new(MockHealthCheck::new(HealthStatus::Healthy).with_name("test_check"));
health_manager.register_check("test_check", check).await;
let info = health_manager.health_info().await;
assert_eq!(info.overall_status, HealthStatus::Healthy);
assert_eq!(info.checks.len(), 1);
assert_eq!(info.checks[0].name, "test_check");
}
#[tokio::test]
async fn test_circuit_breaker() {
let registry = MetricRegistry::new();
let mut health_manager = HealthManager::new(registry);
let check1 = Arc::new(MockHealthCheck::new(HealthStatus::Healthy).with_name("check1"));
let check2 = Arc::new(MockHealthCheck::new(HealthStatus::Unhealthy).with_name("check2"));
health_manager.register_check("check1", check1).await;
health_manager.register_check("check2", check2).await;
let result = health_manager.overall_health().await;
assert_eq!(result, HealthStatus::Unhealthy);
}
#[tokio::test]
async fn test_health_check_timeout() {
let registry = MetricRegistry::new();
let mut health_manager = HealthManager::new(registry);
let check = Arc::new(
MockHealthCheck::new(HealthStatus::Healthy).with_delay(Duration::from_secs(10)),
);
health_manager.register_check("slow_check", check).await;
let status = health_manager.check_health("slow_check").await;
assert_eq!(status.status, HealthStatus::Degraded);
assert!(status
.message
.as_ref()
.is_some_and(|msg| msg.contains("timeout")));
}
}