use crate::telemetry::ErrorCode;
#[cfg(feature = "std")]
use std::collections::HashMap;
#[cfg(feature = "std")]
use std::sync::{Arc, Mutex, OnceLock};
#[cfg(feature = "std")]
use std::time::{Duration, Instant};
#[cfg(not(feature = "std"))]
use alloc::{
collections::BTreeMap as HashMap,
string::{String, ToString},
vec::Vec,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HealthStatus {
Healthy,
Degraded,
Unhealthy,
Unknown,
}
impl HealthStatus {
pub fn is_healthy(&self) -> bool {
matches!(self, HealthStatus::Healthy)
}
pub fn is_operational(&self) -> bool {
matches!(self, HealthStatus::Healthy | HealthStatus::Degraded)
}
pub fn as_str(&self) -> &'static str {
match self {
HealthStatus::Healthy => "healthy",
HealthStatus::Degraded => "degraded",
HealthStatus::Unhealthy => "unhealthy",
HealthStatus::Unknown => "unknown",
}
}
pub fn http_status_code(&self) -> u16 {
match self {
HealthStatus::Healthy => 200,
HealthStatus::Degraded => 200, HealthStatus::Unhealthy => 503, HealthStatus::Unknown => 500, }
}
}
#[derive(Debug, Clone)]
pub struct HealthCheckResult {
pub name: String,
pub status: HealthStatus,
pub message: Option<String>,
pub error_code: Option<ErrorCode>,
#[cfg(feature = "std")]
pub duration: Duration,
pub metadata: HashMap<String, String>,
}
impl HealthCheckResult {
pub fn healthy(name: String) -> Self {
Self {
name,
status: HealthStatus::Healthy,
message: None,
error_code: None,
#[cfg(feature = "std")]
duration: Duration::default(),
metadata: HashMap::new(),
}
}
pub fn degraded(name: String, message: String) -> Self {
Self {
name,
status: HealthStatus::Degraded,
message: Some(message),
error_code: None,
#[cfg(feature = "std")]
duration: Duration::default(),
metadata: HashMap::new(),
}
}
pub fn unhealthy(name: String, message: String, error_code: Option<ErrorCode>) -> Self {
Self {
name,
status: HealthStatus::Unhealthy,
message: Some(message),
error_code,
#[cfg(feature = "std")]
duration: Duration::default(),
metadata: HashMap::new(),
}
}
pub fn with_metadata(mut self, key: String, value: String) -> Self {
self.metadata.insert(key, value);
self
}
#[cfg(feature = "std")]
pub fn with_duration(mut self, duration: Duration) -> Self {
self.duration = duration;
self
}
}
#[derive(Debug, Clone)]
pub struct HealthReport {
pub status: HealthStatus,
pub checks: Vec<HealthCheckResult>,
#[cfg(feature = "std")]
pub timestamp: Instant,
pub total_checks: usize,
pub healthy_checks: usize,
pub degraded_checks: usize,
pub unhealthy_checks: usize,
}
impl HealthReport {
#[cfg(feature = "std")]
pub fn new(checks: Vec<HealthCheckResult>) -> Self {
let total_checks = checks.len();
let healthy_checks = checks
.iter()
.filter(|c| c.status == HealthStatus::Healthy)
.count();
let degraded_checks = checks
.iter()
.filter(|c| c.status == HealthStatus::Degraded)
.count();
let unhealthy_checks = checks
.iter()
.filter(|c| c.status == HealthStatus::Unhealthy)
.count();
let status = if unhealthy_checks > 0 {
HealthStatus::Unhealthy
} else if degraded_checks > 0 {
HealthStatus::Degraded
} else if healthy_checks == total_checks {
HealthStatus::Healthy
} else {
HealthStatus::Unknown
};
Self {
status,
checks,
timestamp: Instant::now(),
total_checks,
healthy_checks,
degraded_checks,
unhealthy_checks,
}
}
pub fn failing_checks(&self) -> Vec<&HealthCheckResult> {
self.checks
.iter()
.filter(|c| c.status != HealthStatus::Healthy)
.collect()
}
pub fn unhealthy_checks_list(&self) -> Vec<&HealthCheckResult> {
self.checks
.iter()
.filter(|c| c.status == HealthStatus::Unhealthy)
.collect()
}
pub fn summary(&self) -> String {
format!(
"Health: {} ({}/{} checks healthy, {} degraded, {} unhealthy)",
self.status.as_str(),
self.healthy_checks,
self.total_checks,
self.degraded_checks,
self.unhealthy_checks
)
}
pub fn to_json(&self) -> String {
let checks_json: Vec<String> = self
.checks
.iter()
.map(|c| {
let message = c.message.as_ref().map_or("null", |m| m.as_str());
let error_code = c
.error_code
.map_or("null".to_string(), |e| e.code().to_string());
format!(
r#"{{"name":"{}","status":"{}","message":"{}","error_code":{}}}"#,
c.name,
c.status.as_str(),
message,
error_code
)
})
.collect();
format!(
r#"{{"status":"{}","total":{}, "healthy":{},"degraded":{},"unhealthy":{},"checks":[{}]}}"#,
self.status.as_str(),
self.total_checks,
self.healthy_checks,
self.degraded_checks,
self.unhealthy_checks,
checks_json.join(",")
)
}
}
#[derive(Debug, Clone)]
pub struct HealthCheckConfig {
pub memory_degraded_threshold: f64,
pub memory_unhealthy_threshold: f64,
pub check_devices: bool,
pub check_storage: bool,
pub check_performance: bool,
pub performance_degradation_threshold: f64,
}
impl Default for HealthCheckConfig {
fn default() -> Self {
Self {
memory_degraded_threshold: 80.0, memory_unhealthy_threshold: 95.0, check_devices: true,
check_storage: true,
check_performance: true,
performance_degradation_threshold: 50.0, }
}
}
#[cfg(feature = "std")]
pub struct HealthChecker {
config: HealthCheckConfig,
last_check: Mutex<Option<HealthReport>>,
check_count: Mutex<u64>,
}
#[cfg(feature = "std")]
impl HealthChecker {
pub fn new() -> Self {
Self::with_config(HealthCheckConfig::default())
}
pub fn with_config(config: HealthCheckConfig) -> Self {
Self {
config,
last_check: Mutex::new(None),
check_count: Mutex::new(0),
}
}
pub fn check_health(&self) -> HealthReport {
let mut checks = Vec::new();
checks.push(self.check_memory());
if self.config.check_devices {
checks.push(self.check_devices());
}
if self.config.check_storage {
checks.push(self.check_storage());
}
if self.config.check_performance {
checks.push(self.check_performance());
}
let report = HealthReport::new(checks);
*self.last_check.lock().expect("lock should not be poisoned") = Some(report.clone());
*self
.check_count
.lock()
.expect("lock should not be poisoned") += 1;
report
}
fn check_memory(&self) -> HealthCheckResult {
let start = Instant::now();
#[cfg(feature = "std")]
{
use crate::memory_monitor::SystemMemoryMonitor;
if let Ok(_monitor) = SystemMemoryMonitor::new() {
return HealthCheckResult::healthy("memory".to_string())
.with_duration(start.elapsed());
}
}
HealthCheckResult::healthy("memory".to_string()).with_duration(start.elapsed())
}
fn check_devices(&self) -> HealthCheckResult {
let start = Instant::now();
let devices_available = true;
let result = if devices_available {
HealthCheckResult::healthy("devices".to_string())
} else {
HealthCheckResult::unhealthy(
"devices".to_string(),
"No compute devices available".to_string(),
Some(ErrorCode::DeviceUnavailable),
)
};
result.with_duration(start.elapsed())
}
fn check_storage(&self) -> HealthCheckResult {
let start = Instant::now();
#[cfg(feature = "std")]
{
use crate::storage::pooled_memory_stats;
let stats_map = pooled_memory_stats();
if !stats_map.is_empty() {
let total_cached: u64 = stats_map
.values()
.map(|s| s.total_cached_allocations as u64)
.sum();
return HealthCheckResult::healthy("storage".to_string())
.with_duration(start.elapsed())
.with_metadata("cached_allocations".to_string(), total_cached.to_string())
.with_metadata("pool_count".to_string(), stats_map.len().to_string());
}
}
HealthCheckResult::healthy("storage".to_string()).with_duration(start.elapsed())
}
fn check_performance(&self) -> HealthCheckResult {
let start = Instant::now();
#[cfg(feature = "std")]
{
use crate::perf_metrics::get_metrics_tracker;
if let Some(tracker) = get_metrics_tracker() {
let tracker = tracker.lock().expect("lock should not be poisoned");
let simd_metrics = tracker.simd_metrics();
let simd_utilization = simd_metrics.utilization_percentage();
if simd_utilization < 50.0 && simd_metrics.simd_ops > 100 {
return HealthCheckResult::degraded(
"performance".to_string(),
format!("Low SIMD utilization: {:.1}%", simd_utilization),
)
.with_duration(start.elapsed())
.with_metadata(
"simd_utilization".to_string(),
format!("{:.1}", simd_utilization),
);
}
}
}
HealthCheckResult::healthy("performance".to_string()).with_duration(start.elapsed())
}
pub fn last_check(&self) -> Option<HealthReport> {
self.last_check
.lock()
.expect("lock should not be poisoned")
.clone()
}
pub fn check_count(&self) -> u64 {
*self
.check_count
.lock()
.expect("lock should not be poisoned")
}
pub fn is_ready(&self) -> bool {
let report = self.check_health();
report.status.is_operational()
}
pub fn is_alive(&self) -> bool {
true
}
}
#[cfg(feature = "std")]
impl Default for HealthChecker {
fn default() -> Self {
Self::new()
}
}
#[cfg(feature = "std")]
static HEALTH_CHECKER: OnceLock<Arc<HealthChecker>> = OnceLock::new();
#[cfg(feature = "std")]
pub fn init_health_checker(config: HealthCheckConfig) {
HEALTH_CHECKER.get_or_init(|| Arc::new(HealthChecker::with_config(config)));
}
#[cfg(feature = "std")]
pub fn health_checker() -> Arc<HealthChecker> {
HEALTH_CHECKER
.get_or_init(|| Arc::new(HealthChecker::new()))
.clone()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_health_status() {
assert!(HealthStatus::Healthy.is_healthy());
assert!(HealthStatus::Healthy.is_operational());
assert!(!HealthStatus::Unhealthy.is_healthy());
assert!(!HealthStatus::Unhealthy.is_operational());
assert!(HealthStatus::Degraded.is_operational());
}
#[test]
fn test_health_status_http_codes() {
assert_eq!(HealthStatus::Healthy.http_status_code(), 200);
assert_eq!(HealthStatus::Degraded.http_status_code(), 200);
assert_eq!(HealthStatus::Unhealthy.http_status_code(), 503);
assert_eq!(HealthStatus::Unknown.http_status_code(), 500);
}
#[test]
fn test_health_check_result() {
let result = HealthCheckResult::healthy("test".to_string());
assert_eq!(result.status, HealthStatus::Healthy);
assert!(result.message.is_none());
let result = HealthCheckResult::degraded("test".to_string(), "warning".to_string());
assert_eq!(result.status, HealthStatus::Degraded);
assert_eq!(result.message, Some("warning".to_string()));
let result = HealthCheckResult::unhealthy(
"test".to_string(),
"error".to_string(),
Some(ErrorCode::DeviceError),
);
assert_eq!(result.status, HealthStatus::Unhealthy);
assert_eq!(result.error_code, Some(ErrorCode::DeviceError));
}
#[test]
#[cfg(feature = "std")]
fn test_health_report() {
let checks = vec![
HealthCheckResult::healthy("memory".to_string()),
HealthCheckResult::degraded("cpu".to_string(), "high load".to_string()),
HealthCheckResult::unhealthy(
"disk".to_string(),
"full".to_string(),
Some(ErrorCode::OutOfMemory),
),
];
let report = HealthReport::new(checks);
assert_eq!(report.status, HealthStatus::Unhealthy);
assert_eq!(report.total_checks, 3);
assert_eq!(report.healthy_checks, 1);
assert_eq!(report.degraded_checks, 1);
assert_eq!(report.unhealthy_checks, 1);
let failing = report.failing_checks();
assert_eq!(failing.len(), 2);
let unhealthy = report.unhealthy_checks_list();
assert_eq!(unhealthy.len(), 1);
}
#[test]
#[cfg(feature = "std")]
fn test_health_checker() {
let checker = HealthChecker::new();
let report = checker.check_health();
assert!(report.total_checks > 0);
assert_eq!(checker.check_count(), 1);
checker.check_health();
assert_eq!(checker.check_count(), 2);
}
#[test]
#[cfg(feature = "std")]
fn test_readiness_and_liveness() {
let checker = HealthChecker::new();
assert!(checker.is_alive());
let _ = checker.is_ready();
}
#[test]
#[cfg(feature = "std")]
fn test_health_report_json() {
let checks = vec![
HealthCheckResult::healthy("memory".to_string()),
HealthCheckResult::degraded("cpu".to_string(), "warning".to_string()),
];
let report = HealthReport::new(checks);
let json = report.to_json();
assert!(json.contains(r#""status":"degraded"#));
assert!(json.contains(r#""total":2"#));
assert!(json.contains(r#""healthy":1"#));
assert!(json.contains(r#""degraded":1"#));
}
#[test]
#[cfg(feature = "std")]
fn test_global_health_checker() {
let config = HealthCheckConfig {
memory_degraded_threshold: 85.0,
..Default::default()
};
init_health_checker(config);
let checker = health_checker();
let report = checker.check_health();
assert!(report.total_checks > 0);
}
}