use crate::processes::ProcessSnapshot;
use crate::telemetry::Telemetry;
use serde::{Deserialize, Serialize};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, RwLock};
use std::thread;
use std::time::Duration;
const ZOMBIE_THRESHOLD: usize = 10;
const CPU_THRESHOLD: f32 = 90.0;
const CPU_ALERT_MINUTES: usize = 5;
const CHECK_INTERVAL_SECS: u64 = 60;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthState {
pub timestamp: u64,
pub cpu_percent: f32,
pub ram_percent: f32,
pub zombie_count: usize,
pub process_count: usize,
pub top_cpu_process: Option<String>,
pub top_mem_process: Option<String>,
pub cpu_alert_count: usize,
pub ram_alert_count: usize,
pub ram_increasing: bool,
pub last_ram_percent: Option<f32>,
}
impl Default for HealthState {
fn default() -> Self {
Self {
timestamp: 0,
cpu_percent: 0.0,
ram_percent: 0.0,
zombie_count: 0,
process_count: 0,
top_cpu_process: None,
top_mem_process: None,
cpu_alert_count: 0,
ram_alert_count: 0,
ram_increasing: false,
last_ram_percent: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum HealthAlert {
ZombieCount { count: usize, threshold: usize },
CpuHigh { percent: f32, minutes: usize },
MemoryLeak { ram_percent: f32 },
}
impl std::fmt::Display for HealthAlert {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
HealthAlert::ZombieCount { count, threshold } => {
write!(f, "Zombie processes: {} (threshold: {})", count, threshold)
}
HealthAlert::CpuHigh { percent, minutes } => {
write!(f, "CPU usage: {:.1}% for {} minutes", percent, minutes)
}
HealthAlert::MemoryLeak { ram_percent } => {
write!(f, "Memory leak detected: {:.1}% RAM", ram_percent)
}
}
}
}
pub struct HealthMonitor {
state: Arc<RwLock<HealthState>>,
stop_flag: Arc<AtomicBool>,
_thread: thread::JoinHandle<()>,
alerts: Arc<RwLock<Vec<HealthAlert>>>,
}
impl Drop for HealthMonitor {
fn drop(&mut self) {
self.stop_flag.store(true, Ordering::Relaxed);
}
}
impl HealthMonitor {
pub fn start() -> Result<Self, String> {
let state = Arc::new(RwLock::new(HealthState::default()));
let alerts = Arc::new(RwLock::new(Vec::new()));
let stop_flag = Arc::new(AtomicBool::new(false));
let state_clone = Arc::clone(&state);
let alerts_clone = Arc::clone(&alerts);
let stop_flag_clone = Arc::clone(&stop_flag);
let handle = thread::spawn(move || {
while !stop_flag_clone.load(Ordering::Relaxed) {
let telemetry = Telemetry::capture();
let processes = ProcessSnapshot::capture();
let mut current_state = state_clone.write().unwrap_or_else(|e| {
eprintln!("[HealthMonitor] State lock poisoned: {}", e);
e.into_inner()
});
current_state.timestamp = telemetry.timestamp;
current_state.cpu_percent = processes.summary.total_cpu_percent;
current_state.ram_percent = parse_ram_percent(&telemetry.system.ram_total, &telemetry.system.ram_free);
current_state.zombie_count = processes.summary.zombie_count;
current_state.process_count = processes.summary.total_processes;
current_state.top_cpu_process = processes.summary.top_cpu_consumer.clone();
current_state.top_mem_process = processes.summary.top_mem_consumer.clone();
if current_state.cpu_percent > CPU_THRESHOLD {
current_state.cpu_alert_count += 1;
if current_state.cpu_alert_count >= CPU_ALERT_MINUTES {
let alert = HealthAlert::CpuHigh {
percent: current_state.cpu_percent,
minutes: current_state.cpu_alert_count,
};
add_alert(&alerts_clone, alert);
}
} else {
current_state.cpu_alert_count = 0;
}
if let Some(last_ram) = current_state.last_ram_percent {
if current_state.ram_percent > last_ram {
current_state.ram_increasing = true;
current_state.ram_alert_count += 1;
if current_state.ram_alert_count >= 5 {
let alert = HealthAlert::MemoryLeak {
ram_percent: current_state.ram_percent,
};
add_alert(&alerts_clone, alert);
}
} else {
current_state.ram_increasing = false;
current_state.ram_alert_count = 0;
}
}
current_state.last_ram_percent = Some(current_state.ram_percent);
if current_state.zombie_count > ZOMBIE_THRESHOLD {
let alert = HealthAlert::ZombieCount {
count: current_state.zombie_count,
threshold: ZOMBIE_THRESHOLD,
};
add_alert(&alerts_clone, alert);
}
for _ in 0..CHECK_INTERVAL_SECS {
if stop_flag_clone.load(Ordering::Relaxed) {
break;
}
thread::sleep(Duration::from_secs(1));
}
}
});
Ok(Self {
state,
stop_flag,
_thread: handle,
alerts,
})
}
pub fn health(&self) -> HealthState {
self.state.read().unwrap_or_else(|e| e.into_inner()).clone()
}
pub fn alerts(&self) -> Vec<HealthAlert> {
self.alerts
.read()
.unwrap_or_else(|e| e.into_inner())
.clone()
}
pub fn stop(&self) {
self.stop_flag.store(true, Ordering::Relaxed);
}
pub fn is_running(&self) -> bool {
!self.stop_flag.load(Ordering::Relaxed)
}
}
fn parse_ram_percent(ram_total: &str, ram_free: &str) -> f32 {
let total_val = parse_size_value(ram_total.trim());
let free_val = parse_size_value(ram_free.trim());
if total_val > 0.0 {
((total_val - free_val) / total_val) * 100.0
} else {
0.0
}
}
fn parse_size_value(size_str: &str) -> f32 {
let size_str = size_str.trim();
if size_str.ends_with("Gi") {
size_str.trim_end_matches("Gi").parse().unwrap_or(0.0)
} else if size_str.ends_with("Mi") {
size_str
.trim_end_matches("Mi")
.parse::<f32>()
.map(|v| v / 1024.0)
.unwrap_or(0.0)
} else if size_str.ends_with("Ki") {
size_str
.trim_end_matches("Ki")
.parse::<f32>()
.map(|v| v / (1024.0 * 1024.0))
.unwrap_or(0.0)
} else if size_str.ends_with("MB") {
size_str
.trim_end_matches("MB")
.parse::<f32>()
.map(|v| v / 1000.0)
.unwrap_or(0.0)
} else if size_str.ends_with("GB") {
size_str
.trim_end_matches("GB")
.parse::<f32>()
.unwrap_or(0.0)
} else {
0.0
}
}
fn add_alert(alerts: &Arc<RwLock<Vec<HealthAlert>>>, alert: HealthAlert) {
let mut alerts_vec = alerts.write().expect("Alerts lock poisoned");
alerts_vec.push(alert);
if alerts_vec.len() > 100 {
alerts_vec.remove(0);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_health_monitor_lifecycle() {
let monitor = HealthMonitor::start().expect("Failed to start monitor");
assert!(monitor.is_running());
monitor.stop();
thread::sleep(Duration::from_millis(1100));
assert!(!monitor.is_running());
}
#[test]
fn test_health_state_defaults() {
let state = HealthState::default();
assert_eq!(state.cpu_alert_count, 0);
assert_eq!(state.ram_alert_count, 0);
assert!(!state.ram_increasing);
assert!(state.last_ram_percent.is_none());
}
#[test]
fn test_cpu_alert_after_consecutive_checks() {
let mut state = HealthState::default();
for _ in 0..5 {
state.cpu_percent = 95.0;
if state.cpu_percent > CPU_THRESHOLD {
state.cpu_alert_count += 1;
}
}
assert_eq!(state.cpu_alert_count, 5);
}
#[test]
fn test_ram_alert_uses_ram_counter_not_cpu() {
let mut state = HealthState {
last_ram_percent: Some(50.0),
..Default::default()
};
for i in 0..5 {
state.ram_percent = 50.0 + (i as f32 + 1.0); state.cpu_percent = 10.0; if state.ram_percent > state.last_ram_percent.unwrap() {
state.ram_increasing = true;
state.ram_alert_count += 1;
} else {
state.ram_increasing = false;
state.ram_alert_count = 0;
}
state.last_ram_percent = Some(state.ram_percent);
}
assert_eq!(state.ram_alert_count, 5);
assert!(state.ram_increasing);
}
#[test]
fn test_ram_alert_resets_when_ram_decreases() {
let mut state = HealthState {
last_ram_percent: Some(50.0),
..Default::default()
};
state.ram_percent = 55.0;
state.ram_alert_count = 2;
state.last_ram_percent = Some(55.0);
state.ram_percent = 40.0;
if state.ram_percent > state.last_ram_percent.unwrap() {
state.ram_increasing = true;
state.ram_alert_count += 1;
} else {
state.ram_increasing = false;
state.ram_alert_count = 0;
}
state.last_ram_percent = Some(state.ram_percent);
assert_eq!(state.ram_alert_count, 0);
assert!(!state.ram_increasing);
}
#[test]
fn test_parse_size_value() {
assert!((parse_size_value("13Gi") - 13.0).abs() < 0.01);
assert!((parse_size_value("512Mi") - 0.5).abs() < 0.01);
assert_eq!(parse_size_value("invalid"), 0.0);
}
}