1use crate::processes::ProcessSnapshot;
22use crate::telemetry::Telemetry;
23use serde::{Deserialize, Serialize};
24use std::sync::atomic::{AtomicBool, Ordering};
25use std::sync::{Arc, RwLock};
26use std::thread;
27use std::time::Duration;
28
29const ZOMBIE_THRESHOLD: usize = 10;
31const CPU_THRESHOLD: f32 = 90.0;
32const CPU_ALERT_MINUTES: usize = 5;
33const CHECK_INTERVAL_SECS: u64 = 60;
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct HealthState {
38 pub timestamp: u64,
40 pub cpu_percent: f32,
42 pub ram_percent: f32,
44 pub zombie_count: usize,
46 pub process_count: usize,
48 pub top_cpu_process: Option<String>,
50 pub top_mem_process: Option<String>,
52 pub cpu_alert_count: usize,
54 pub ram_alert_count: usize,
56 pub ram_increasing: bool,
58 pub last_ram_percent: Option<f32>,
60}
61
62impl Default for HealthState {
63 fn default() -> Self {
64 Self {
65 timestamp: 0,
66 cpu_percent: 0.0,
67 ram_percent: 0.0,
68 zombie_count: 0,
69 process_count: 0,
70 top_cpu_process: None,
71 top_mem_process: None,
72 cpu_alert_count: 0,
73 ram_alert_count: 0,
74 ram_increasing: false,
75 last_ram_percent: None,
76 }
77 }
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82pub enum HealthAlert {
83 ZombieCount { count: usize, threshold: usize },
85 CpuHigh { percent: f32, minutes: usize },
87 MemoryLeak { ram_percent: f32 },
89}
90
91impl std::fmt::Display for HealthAlert {
92 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93 match self {
94 HealthAlert::ZombieCount { count, threshold } => {
95 write!(f, "Zombie processes: {} (threshold: {})", count, threshold)
96 }
97 HealthAlert::CpuHigh { percent, minutes } => {
98 write!(f, "CPU usage: {:.1}% for {} minutes", percent, minutes)
99 }
100 HealthAlert::MemoryLeak { ram_percent } => {
101 write!(f, "Memory leak detected: {:.1}% RAM", ram_percent)
102 }
103 }
104 }
105}
106
107pub struct HealthMonitor {
112 state: Arc<RwLock<HealthState>>,
114 stop_flag: Arc<AtomicBool>,
116 _thread: thread::JoinHandle<()>,
118 alerts: Arc<RwLock<Vec<HealthAlert>>>,
120}
121
122impl Drop for HealthMonitor {
123 fn drop(&mut self) {
124 self.stop_flag.store(true, Ordering::Relaxed);
125 }
126}
127
128impl HealthMonitor {
129 pub fn start() -> Result<Self, String> {
141 let state = Arc::new(RwLock::new(HealthState::default()));
142 let alerts = Arc::new(RwLock::new(Vec::new()));
143 let stop_flag = Arc::new(AtomicBool::new(false));
144
145 let state_clone = Arc::clone(&state);
146 let alerts_clone = Arc::clone(&alerts);
147 let stop_flag_clone = Arc::clone(&stop_flag);
148
149 let handle = thread::spawn(move || {
150 while !stop_flag_clone.load(Ordering::Relaxed) {
151 let telemetry = Telemetry::capture();
153 let processes = ProcessSnapshot::capture();
154
155 let mut current_state = state_clone.write().unwrap_or_else(|e| {
156 eprintln!("[HealthMonitor] State lock poisoned: {}", e);
157 e.into_inner()
159 });
160
161 current_state.timestamp = telemetry.timestamp;
163 current_state.cpu_percent = processes.summary.total_cpu_percent;
164 current_state.ram_percent = parse_ram_percent(&telemetry.system.ram_total, &telemetry.system.ram_free);
165 current_state.zombie_count = processes.summary.zombie_count;
166 current_state.process_count = processes.summary.total_processes;
167 current_state.top_cpu_process = processes.summary.top_cpu_consumer.clone();
168 current_state.top_mem_process = processes.summary.top_mem_consumer.clone();
169
170 if current_state.cpu_percent > CPU_THRESHOLD {
172 current_state.cpu_alert_count += 1;
173 if current_state.cpu_alert_count >= CPU_ALERT_MINUTES {
174 let alert = HealthAlert::CpuHigh {
175 percent: current_state.cpu_percent,
176 minutes: current_state.cpu_alert_count,
177 };
178 add_alert(&alerts_clone, alert);
179 }
180 } else {
181 current_state.cpu_alert_count = 0;
182 }
183
184 if let Some(last_ram) = current_state.last_ram_percent {
186 if current_state.ram_percent > last_ram {
187 current_state.ram_increasing = true;
188 current_state.ram_alert_count += 1;
189 if current_state.ram_alert_count >= 5 {
191 let alert = HealthAlert::MemoryLeak {
192 ram_percent: current_state.ram_percent,
193 };
194 add_alert(&alerts_clone, alert);
195 }
196 } else {
197 current_state.ram_increasing = false;
198 current_state.ram_alert_count = 0;
199 }
200 }
201 current_state.last_ram_percent = Some(current_state.ram_percent);
202
203 if current_state.zombie_count > ZOMBIE_THRESHOLD {
205 let alert = HealthAlert::ZombieCount {
206 count: current_state.zombie_count,
207 threshold: ZOMBIE_THRESHOLD,
208 };
209 add_alert(&alerts_clone, alert);
210 }
211
212 for _ in 0..CHECK_INTERVAL_SECS {
214 if stop_flag_clone.load(Ordering::Relaxed) {
215 break;
216 }
217 thread::sleep(Duration::from_secs(1));
218 }
219 }
220 });
221
222 Ok(Self {
223 state,
224 stop_flag,
225 _thread: handle,
226 alerts,
227 })
228 }
229
230 pub fn health(&self) -> HealthState {
232 self.state.read().unwrap_or_else(|e| e.into_inner()).clone()
233 }
234
235 pub fn alerts(&self) -> Vec<HealthAlert> {
237 self.alerts
238 .read()
239 .unwrap_or_else(|e| e.into_inner())
240 .clone()
241 }
242
243 pub fn stop(&self) {
245 self.stop_flag.store(true, Ordering::Relaxed);
246 }
247
248 pub fn is_running(&self) -> bool {
250 !self.stop_flag.load(Ordering::Relaxed)
251 }
252}
253
254fn parse_ram_percent(ram_total: &str, ram_free: &str) -> f32 {
259 let total_val = parse_size_value(ram_total.trim());
260 let free_val = parse_size_value(ram_free.trim());
261
262 if total_val > 0.0 {
263 ((total_val - free_val) / total_val) * 100.0
264 } else {
265 0.0
266 }
267}
268
269fn parse_size_value(size_str: &str) -> f32 {
271 let size_str = size_str.trim();
272 if size_str.ends_with("Gi") {
273 size_str.trim_end_matches("Gi").parse().unwrap_or(0.0)
274 } else if size_str.ends_with("Mi") {
275 size_str
276 .trim_end_matches("Mi")
277 .parse::<f32>()
278 .map(|v| v / 1024.0)
279 .unwrap_or(0.0)
280 } else if size_str.ends_with("Ki") {
281 size_str
282 .trim_end_matches("Ki")
283 .parse::<f32>()
284 .map(|v| v / (1024.0 * 1024.0))
285 .unwrap_or(0.0)
286 } else if size_str.ends_with("MB") {
287 size_str
288 .trim_end_matches("MB")
289 .parse::<f32>()
290 .map(|v| v / 1000.0)
291 .unwrap_or(0.0)
292 } else if size_str.ends_with("GB") {
293 size_str
294 .trim_end_matches("GB")
295 .parse::<f32>()
296 .unwrap_or(0.0)
297 } else {
298 0.0
299 }
300}
301
302fn add_alert(alerts: &Arc<RwLock<Vec<HealthAlert>>>, alert: HealthAlert) {
304 let mut alerts_vec = alerts.write().expect("Alerts lock poisoned");
305 alerts_vec.push(alert);
306 if alerts_vec.len() > 100 {
307 alerts_vec.remove(0);
308 }
309}
310
311#[cfg(test)]
312mod tests {
313 use super::*;
314
315 #[test]
316 fn test_health_monitor_lifecycle() {
317 let monitor = HealthMonitor::start().expect("Failed to start monitor");
318 assert!(monitor.is_running());
319 monitor.stop();
321 thread::sleep(Duration::from_millis(1100));
323 assert!(!monitor.is_running());
324 }
325
326 #[test]
327 fn test_health_state_defaults() {
328 let state = HealthState::default();
329 assert_eq!(state.cpu_alert_count, 0);
330 assert_eq!(state.ram_alert_count, 0);
331 assert!(!state.ram_increasing);
332 assert!(state.last_ram_percent.is_none());
333 }
334
335 #[test]
336 fn test_cpu_alert_after_consecutive_checks() {
337 let mut state = HealthState::default();
338 for _ in 0..5 {
340 state.cpu_percent = 95.0;
341 if state.cpu_percent > CPU_THRESHOLD {
342 state.cpu_alert_count += 1;
343 }
344 }
345 assert_eq!(state.cpu_alert_count, 5);
346 }
347
348#[test]
349fn test_ram_alert_uses_ram_counter_not_cpu() {
350 let mut state = HealthState {
351 last_ram_percent: Some(50.0),
352 ..Default::default()
353 };
354 for i in 0..5 {
356 state.ram_percent = 50.0 + (i as f32 + 1.0); state.cpu_percent = 10.0; if state.ram_percent > state.last_ram_percent.unwrap() {
359 state.ram_increasing = true;
360 state.ram_alert_count += 1;
361 } else {
362 state.ram_increasing = false;
363 state.ram_alert_count = 0;
364 }
365 state.last_ram_percent = Some(state.ram_percent);
366 }
367 assert_eq!(state.ram_alert_count, 5);
369 assert!(state.ram_increasing);
370}
371
372#[test]
373fn test_ram_alert_resets_when_ram_decreases() {
374 let mut state = HealthState {
375 last_ram_percent: Some(50.0),
376 ..Default::default()
377 };
378
379 state.ram_percent = 55.0;
381 state.ram_alert_count = 2;
382 state.last_ram_percent = Some(55.0);
383
384 state.ram_percent = 40.0;
386 if state.ram_percent > state.last_ram_percent.unwrap() {
387 state.ram_increasing = true;
388 state.ram_alert_count += 1;
389 } else {
390 state.ram_increasing = false;
391 state.ram_alert_count = 0;
392 }
393 state.last_ram_percent = Some(state.ram_percent);
394
395 assert_eq!(state.ram_alert_count, 0);
396 assert!(!state.ram_increasing);
397 }
398
399 #[test]
400 fn test_parse_size_value() {
401 assert!((parse_size_value("13Gi") - 13.0).abs() < 0.01);
402 assert!((parse_size_value("512Mi") - 0.5).abs() < 0.01);
403 assert_eq!(parse_size_value("invalid"), 0.0);
404 }
405}