use concerto_core::GpuHealth;
use crate::monitor::GpuSnapshot;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct HealthThresholds {
pub max_healthy_temperature: u32,
pub max_degraded_temperature: u32,
pub max_tolerated_ecc: u64,
}
impl Default for HealthThresholds {
fn default() -> Self {
Self {
max_healthy_temperature: 75,
max_degraded_temperature: 85,
max_tolerated_ecc: 0,
}
}
}
pub fn classify_health(snapshot: &GpuSnapshot, thresholds: &HealthThresholds) -> GpuHealth {
if snapshot.temperature_celsius > thresholds.max_degraded_temperature
|| snapshot.ecc_errors_uncorrected > thresholds.max_tolerated_ecc
{
GpuHealth::Unhealthy
} else if snapshot.temperature_celsius > thresholds.max_healthy_temperature {
GpuHealth::Degraded
} else {
GpuHealth::Healthy
}
}