#![allow(dead_code)]
use crate::storage::StorageLayer;
use crate::utils::error::Result;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::RwLock;
use tracing::{debug, error};
#[derive(Debug)]
pub struct HealthChecker {
storage: Arc<StorageLayer>,
component_health: Arc<RwLock<HashMap<String, ComponentHealth>>>,
overall_health: Arc<RwLock<HealthStatus>>,
active: Arc<RwLock<bool>>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct HealthStatus {
pub overall_healthy: bool,
pub last_check: chrono::DateTime<chrono::Utc>,
pub components: HashMap<String, ComponentHealth>,
pub uptime_seconds: u64,
pub summary: HealthSummary,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct ComponentHealth {
pub name: String,
pub healthy: bool,
pub status: String,
pub last_check: chrono::DateTime<chrono::Utc>,
pub response_time_ms: u64,
pub error: Option<String>,
pub metadata: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct HealthSummary {
pub total_components: usize,
pub healthy_components: usize,
pub unhealthy_components: usize,
pub health_percentage: f64,
}
#[derive(Debug, Clone)]
pub struct HealthCheckConfig {
pub name: String,
pub interval: Duration,
pub timeout: Duration,
pub retries: u32,
pub critical: bool,
}
impl HealthChecker {
pub async fn new(storage: Arc<StorageLayer>) -> Result<Self> {
Ok(Self {
storage,
component_health: Arc::new(RwLock::new(HashMap::new())),
overall_health: Arc::new(RwLock::new(HealthStatus {
overall_healthy: true,
last_check: chrono::Utc::now(),
components: HashMap::new(),
uptime_seconds: 0,
summary: HealthSummary {
total_components: 0,
healthy_components: 0,
unhealthy_components: 0,
health_percentage: 100.0,
},
})),
active: Arc::new(RwLock::new(false)),
})
}
pub async fn start(&self) -> Result<()> {
debug!("Starting health checker");
*self.active.write().await = true;
self.start_health_check_tasks().await;
Ok(())
}
pub async fn stop(&self) -> Result<()> {
debug!("Stopping health checker");
*self.active.write().await = false;
Ok(())
}
pub async fn get_status(&self) -> Result<HealthStatus> {
let status = self.overall_health.read().await.clone();
Ok(status)
}
pub async fn check_all(&self) -> Result<HealthStatus> {
debug!("Running comprehensive health check");
let start_time = Instant::now();
let mut components = HashMap::new();
let storage_health = self.check_storage().await;
components.insert("storage".to_string(), storage_health);
let database_health = self.check_database().await;
components.insert("database".to_string(), database_health);
let redis_health = self.check_redis().await;
components.insert("redis".to_string(), redis_health);
let file_storage_health = self.check_file_storage().await;
components.insert("file_storage".to_string(), file_storage_health);
if self.storage.vector().is_some() {
let vector_health = self.check_vector_database().await;
components.insert("vector_database".to_string(), vector_health);
}
let healthy_components = components.values().filter(|c| c.healthy).count();
let total_components = components.len();
let overall_healthy = healthy_components == total_components;
let health_percentage = (healthy_components as f64 / total_components as f64) * 100.0;
let health_status = HealthStatus {
overall_healthy,
last_check: chrono::Utc::now(),
components: components.clone(),
uptime_seconds: start_time.elapsed().as_secs(),
summary: HealthSummary {
total_components,
healthy_components,
unhealthy_components: total_components - healthy_components,
health_percentage,
},
};
{
let mut stored_health = self.overall_health.write().await;
*stored_health = health_status.clone();
}
{
let mut stored_components = self.component_health.write().await;
*stored_components = components;
}
Ok(health_status)
}
async fn check_storage(&self) -> ComponentHealth {
let start_time = Instant::now();
match self.storage.health_check().await {
Ok(storage_status) => ComponentHealth {
name: "storage".to_string(),
healthy: storage_status.overall,
status: if storage_status.overall {
"healthy"
} else {
"degraded"
}
.to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: None,
metadata: serde_json::to_value(&storage_status)
.unwrap_or_default()
.as_object()
.map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
.unwrap_or_default(),
},
Err(e) => ComponentHealth {
name: "storage".to_string(),
healthy: false,
status: "unhealthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: Some(e.to_string()),
metadata: HashMap::new(),
},
}
}
async fn check_database(&self) -> ComponentHealth {
let start_time = Instant::now();
match self.storage.db().health_check().await {
Ok(()) => {
let stats = self.storage.db().stats();
let mut metadata = HashMap::new();
metadata.insert(
"pool_size".to_string(),
serde_json::Value::Number(stats.size.into()),
);
metadata.insert(
"idle_connections".to_string(),
serde_json::Value::Number(stats.idle.into()),
);
ComponentHealth {
name: "database".to_string(),
healthy: true,
status: "healthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: None,
metadata,
}
}
Err(e) => ComponentHealth {
name: "database".to_string(),
healthy: false,
status: "unhealthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: Some(e.to_string()),
metadata: HashMap::new(),
},
}
}
async fn check_redis(&self) -> ComponentHealth {
let start_time = Instant::now();
match self.storage.redis().health_check().await {
Ok(()) => ComponentHealth {
name: "redis".to_string(),
healthy: true,
status: "healthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: None,
metadata: HashMap::new(),
},
Err(e) => ComponentHealth {
name: "redis".to_string(),
healthy: false,
status: "unhealthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: Some(e.to_string()),
metadata: HashMap::new(),
},
}
}
async fn check_file_storage(&self) -> ComponentHealth {
let start_time = Instant::now();
match self.storage.files().health_check().await {
Ok(()) => ComponentHealth {
name: "file_storage".to_string(),
healthy: true,
status: "healthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: None,
metadata: HashMap::new(),
},
Err(e) => ComponentHealth {
name: "file_storage".to_string(),
healthy: false,
status: "unhealthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: Some(e.to_string()),
metadata: HashMap::new(),
},
}
}
async fn check_vector_database(&self) -> ComponentHealth {
let start_time = Instant::now();
if let Some(vector_store) = self.storage.vector() {
match vector_store.health_check().await {
Ok(()) => ComponentHealth {
name: "vector_database".to_string(),
healthy: true,
status: "healthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: None,
metadata: HashMap::new(),
},
Err(e) => ComponentHealth {
name: "vector_database".to_string(),
healthy: false,
status: "unhealthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: Some(e.to_string()),
metadata: HashMap::new(),
},
}
} else {
ComponentHealth {
name: "vector_database".to_string(),
healthy: true,
status: "not_configured".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: 0,
error: None,
metadata: HashMap::new(),
}
}
}
async fn check_provider(&self, provider_name: &str, provider_url: &str) -> ComponentHealth {
let start_time = Instant::now();
match reqwest::Client::new()
.get(provider_url)
.timeout(Duration::from_secs(10))
.send()
.await
{
Ok(response) => {
let healthy = response.status().is_success();
ComponentHealth {
name: provider_name.to_string(),
healthy,
status: if healthy { "healthy" } else { "degraded" }.to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: if healthy {
None
} else {
Some(format!("HTTP {}", response.status()))
},
metadata: {
let mut metadata = HashMap::new();
metadata.insert(
"status_code".to_string(),
serde_json::Value::Number(response.status().as_u16().into()),
);
metadata
},
}
}
Err(e) => ComponentHealth {
name: provider_name.to_string(),
healthy: false,
status: "unhealthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: start_time.elapsed().as_millis() as u64,
error: Some(e.to_string()),
metadata: HashMap::new(),
},
}
}
async fn start_health_check_tasks(&self) {
let health_checker = self.clone();
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(30));
loop {
interval.tick().await;
if !*health_checker.active.read().await {
break;
}
if let Err(e) = health_checker.check_all().await {
error!("Health check failed: {}", e);
}
}
});
}
pub async fn get_component_health(&self, component_name: &str) -> Option<ComponentHealth> {
let components = self.component_health.read().await;
components.get(component_name).cloned()
}
pub async fn is_component_healthy(&self, component_name: &str) -> bool {
if let Some(component) = self.get_component_health(component_name).await {
component.healthy
} else {
false
}
}
pub async fn get_unhealthy_components(&self) -> Vec<ComponentHealth> {
let components = self.component_health.read().await;
components
.values()
.filter(|component| !component.healthy)
.cloned()
.collect()
}
}
impl Clone for HealthChecker {
fn clone(&self) -> Self {
Self {
storage: self.storage.clone(),
component_health: self.component_health.clone(),
overall_health: self.overall_health.clone(),
active: self.active.clone(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_component_health_creation() {
let health = ComponentHealth {
name: "test_component".to_string(),
healthy: true,
status: "healthy".to_string(),
last_check: chrono::Utc::now(),
response_time_ms: 50,
error: None,
metadata: HashMap::new(),
};
assert!(health.healthy);
assert_eq!(health.name, "test_component");
assert_eq!(health.response_time_ms, 50);
}
#[test]
fn test_health_summary_calculation() {
let summary = HealthSummary {
total_components: 5,
healthy_components: 4,
unhealthy_components: 1,
health_percentage: 80.0,
};
assert_eq!(summary.total_components, 5);
assert_eq!(summary.healthy_components, 4);
assert_eq!(summary.health_percentage, 80.0);
}
#[test]
fn test_health_check_config() {
let config = HealthCheckConfig {
name: "database".to_string(),
interval: Duration::from_secs(30),
timeout: Duration::from_secs(5),
retries: 3,
critical: true,
};
assert_eq!(config.name, "database");
assert!(config.critical);
assert_eq!(config.retries, 3);
}
}