1use crate::processes::ProcessSnapshot;
22use crate::telemetry::Telemetry;
23use serde::{Deserialize, Serialize};
24use std::sync::atomic::{AtomicBool, Ordering};
25use std::sync::{Arc, RwLock};
26use std::thread;
27use std::time::Duration;
28
29const ZOMBIE_THRESHOLD: usize = 10;
31const CPU_THRESHOLD: f32 = 90.0;
32const CPU_ALERT_MINUTES: usize = 5;
33const CHECK_INTERVAL_SECS: u64 = 60;
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37#[allow(clippy::exhaustive_structs)]
38pub struct HealthState {
39 pub timestamp: u64,
41 pub cpu_percent: f32,
43 pub ram_percent: f32,
45 pub zombie_count: usize,
47 pub process_count: usize,
49 pub top_cpu_process: Option<String>,
51 pub top_mem_process: Option<String>,
53 pub cpu_alert_count: usize,
55 pub ram_alert_count: usize,
57 pub ram_increasing: bool,
59 pub last_ram_percent: Option<f32>,
61}
62
63impl Default for HealthState {
64 fn default() -> Self {
65 Self {
66 timestamp: 0,
67 cpu_percent: 0.0,
68 ram_percent: 0.0,
69 zombie_count: 0,
70 process_count: 0,
71 top_cpu_process: None,
72 top_mem_process: None,
73 cpu_alert_count: 0,
74 ram_alert_count: 0,
75 ram_increasing: false,
76 last_ram_percent: None,
77 }
78 }
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
83#[allow(clippy::exhaustive_enums)]
84pub enum HealthAlert {
85 ZombieCount { count: usize, threshold: usize },
87 CpuHigh { percent: f32, minutes: usize },
89 MemoryLeak { ram_percent: f32 },
91}
92
93impl std::fmt::Display for HealthAlert {
94 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95 match self {
96 Self::ZombieCount { count, threshold } => {
97 write!(f, "Zombie processes: {} (threshold: {})", count, threshold)
98 }
99 Self::CpuHigh { percent, minutes } => {
100 write!(f, "CPU usage: {:.1}% for {} minutes", percent, minutes)
101 }
102 Self::MemoryLeak { ram_percent } => {
103 write!(f, "Memory leak detected: {:.1}% RAM", ram_percent)
104 }
105 }
106 }
107}
108
109#[allow(clippy::exhaustive_structs)]
114pub struct HealthMonitor {
115 state: Arc<RwLock<HealthState>>,
117 stop_flag: Arc<AtomicBool>,
119 _thread: thread::JoinHandle<()>,
121 alerts: Arc<RwLock<Vec<HealthAlert>>>,
123}
124
125impl Drop for HealthMonitor {
126 fn drop(&mut self) {
127 self.stop_flag.store(true, Ordering::Relaxed);
128 }
129}
130
131impl HealthMonitor {
132 #[allow(clippy::arithmetic_side_effects)] pub fn start() -> Result<Self, String> {
149 let state = Arc::new(RwLock::new(HealthState::default()));
150 let alerts = Arc::new(RwLock::new(Vec::new()));
151 let stop_flag = Arc::new(AtomicBool::new(false));
152
153 let state_clone = Arc::clone(&state);
154 let alerts_clone = Arc::clone(&alerts);
155 let stop_flag_clone = Arc::clone(&stop_flag);
156
157 let handle = thread::spawn(move || {
158 while !stop_flag_clone.load(Ordering::Relaxed) {
159 let telemetry = Telemetry::capture();
161 let processes = ProcessSnapshot::capture();
162
163 let mut current_state = state_clone.write().unwrap_or_else(|e| {
164 eprintln!("[HealthMonitor] State lock poisoned: {}", e);
165 e.into_inner()
167 });
168
169 current_state.timestamp = telemetry.timestamp;
171 current_state.cpu_percent = processes.summary.total_cpu_percent;
172 current_state.ram_percent =
173 parse_ram_percent(&telemetry.system.ram_total, &telemetry.system.ram_free);
174 current_state.zombie_count = processes.summary.zombie_count;
175 current_state.process_count = processes.summary.total_processes;
176 current_state
177 .top_cpu_process
178 .clone_from(&processes.summary.top_cpu_consumer);
179 current_state
180 .top_mem_process
181 .clone_from(&processes.summary.top_mem_consumer);
182
183 if current_state.cpu_percent > CPU_THRESHOLD {
185 current_state.cpu_alert_count += 1;
186 if current_state.cpu_alert_count >= CPU_ALERT_MINUTES {
187 let alert = HealthAlert::CpuHigh {
188 percent: current_state.cpu_percent,
189 minutes: current_state.cpu_alert_count,
190 };
191 add_alert(&alerts_clone, alert);
192 }
193 } else {
194 current_state.cpu_alert_count = 0;
195 }
196
197 if let Some(last_ram) = current_state.last_ram_percent {
199 if current_state.ram_percent > last_ram {
200 current_state.ram_increasing = true;
201 current_state.ram_alert_count += 1;
202 if current_state.ram_alert_count >= 5 {
204 let alert = HealthAlert::MemoryLeak {
205 ram_percent: current_state.ram_percent,
206 };
207 add_alert(&alerts_clone, alert);
208 }
209 } else {
210 current_state.ram_increasing = false;
211 current_state.ram_alert_count = 0;
212 }
213 }
214 current_state.last_ram_percent = Some(current_state.ram_percent);
215
216 if current_state.zombie_count > ZOMBIE_THRESHOLD {
218 let alert = HealthAlert::ZombieCount {
219 count: current_state.zombie_count,
220 threshold: ZOMBIE_THRESHOLD,
221 };
222 add_alert(&alerts_clone, alert);
223 }
224
225 for _ in 0..CHECK_INTERVAL_SECS {
227 if stop_flag_clone.load(Ordering::Relaxed) {
228 break;
229 }
230 thread::sleep(Duration::from_secs(1));
231 }
232 }
233 });
234
235 Ok(Self {
236 state,
237 stop_flag,
238 _thread: handle,
239 alerts,
240 })
241 }
242
243 #[must_use]
245 pub fn health(&self) -> HealthState {
246 self.state.read().unwrap_or_else(|e| e.into_inner()).clone()
247 }
248
249 #[must_use]
251 pub fn alerts(&self) -> Vec<HealthAlert> {
252 self.alerts
253 .read()
254 .unwrap_or_else(|e| e.into_inner())
255 .clone()
256 }
257
258 pub fn stop(&self) {
260 self.stop_flag.store(true, Ordering::Relaxed);
261 }
262
263 #[must_use]
265 pub fn is_running(&self) -> bool {
266 !self.stop_flag.load(Ordering::Relaxed)
267 }
268}
269
270fn parse_ram_percent(ram_total: &str, ram_free: &str) -> f32 {
275 let total_val = parse_size_value(ram_total.trim());
276 let free_val = parse_size_value(ram_free.trim());
277
278 if total_val > 0.0 {
279 ((total_val - free_val) / total_val) * 100.0
280 } else {
281 0.0
282 }
283}
284
285fn parse_size_value(size_str: &str) -> f32 {
287 let size_str = size_str.trim();
288 if size_str.ends_with("Gi") {
289 size_str.trim_end_matches("Gi").parse().unwrap_or(0.0)
290 } else if size_str.ends_with("Mi") {
291 #[allow(clippy::map_unwrap_or)] size_str
293 .trim_end_matches("Mi")
294 .parse::<f32>()
295 .map(|v| v / 1024.0)
296 .unwrap_or(0.0)
297 } else if size_str.ends_with("Ki") {
298 size_str
299 .trim_end_matches("Ki")
300 .parse::<f32>()
301 .map_or(0.0, |v| v / (1024.0 * 1024.0))
302 } else if size_str.ends_with("MB") {
303 size_str
304 .trim_end_matches("MB")
305 .parse::<f32>()
306 .map_or(0.0, |v| v / 1000.0)
307 } else if size_str.ends_with("GB") {
308 size_str
309 .trim_end_matches("GB")
310 .parse::<f32>()
311 .unwrap_or(0.0)
312 } else {
313 0.0
314 }
315}
316
317fn add_alert(alerts: &Arc<RwLock<Vec<HealthAlert>>>, alert: HealthAlert) {
319 #[allow(clippy::expect_used)] let mut alerts_vec = alerts.write().expect("Alerts lock poisoned");
321 alerts_vec.push(alert);
322 if alerts_vec.len() > 100 {
323 alerts_vec.remove(0);
324 }
325}
326
327#[cfg(test)]
328#[allow(clippy::float_cmp, clippy::use_self)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn test_health_monitor_lifecycle() {
334 let monitor = HealthMonitor::start().expect("Failed to start monitor");
335 assert!(monitor.is_running());
336 monitor.stop();
338 thread::sleep(Duration::from_millis(1100));
340 assert!(!monitor.is_running());
341 }
342
343 #[test]
344 fn test_health_state_defaults() {
345 let state = HealthState::default();
346 assert_eq!(state.cpu_alert_count, 0);
347 assert_eq!(state.ram_alert_count, 0);
348 assert!(!state.ram_increasing);
349 assert!(state.last_ram_percent.is_none());
350 }
351
352 #[test]
353 fn test_cpu_alert_after_consecutive_checks() {
354 let mut state = HealthState::default();
355 for _ in 0..5 {
357 state.cpu_percent = 95.0;
358 if state.cpu_percent > CPU_THRESHOLD {
359 state.cpu_alert_count += 1;
360 }
361 }
362 assert_eq!(state.cpu_alert_count, 5);
363 }
364
365 #[test]
366 fn test_ram_alert_uses_ram_counter_not_cpu() {
367 let mut state = HealthState {
368 last_ram_percent: Some(50.0),
369 ..Default::default()
370 };
371 #[allow(clippy::cast_precision_loss)]
373 for i in 0..5 {
374 state.ram_percent = 50.0 + (i as f32 + 1.0); state.cpu_percent = 10.0; if state.ram_percent > state.last_ram_percent.unwrap() {
377 state.ram_increasing = true;
378 state.ram_alert_count += 1;
379 } else {
380 state.ram_increasing = false;
381 state.ram_alert_count = 0;
382 }
383 state.last_ram_percent = Some(state.ram_percent);
384 }
385 assert_eq!(state.ram_alert_count, 5);
387 assert!(state.ram_increasing);
388 }
389
390 #[test]
391 fn test_ram_alert_resets_when_ram_decreases() {
392 let mut state = HealthState {
393 last_ram_percent: Some(50.0),
394 ..Default::default()
395 };
396
397 state.ram_percent = 55.0;
399 state.ram_alert_count = 2;
400 state.last_ram_percent = Some(55.0);
401
402 state.ram_percent = 40.0;
404 if state.ram_percent > state.last_ram_percent.unwrap() {
405 state.ram_increasing = true;
406 state.ram_alert_count += 1;
407 } else {
408 state.ram_increasing = false;
409 state.ram_alert_count = 0;
410 }
411 state.last_ram_percent = Some(state.ram_percent);
412
413 assert_eq!(state.ram_alert_count, 0);
414 assert!(!state.ram_increasing);
415 }
416
417 #[test]
418 fn test_parse_size_value() {
419 assert!((parse_size_value("13Gi") - 13.0).abs() < 0.01);
420 assert!((parse_size_value("512Mi") - 0.5).abs() < 0.01);
421 assert_eq!(parse_size_value("invalid"), 0.0);
422 }
423}