Skip to main content

metrics_lib/
system_health.rs

1//! # System Health Monitoring
2//!
3//! Ultra-fast system resource monitoring with process introspection.
4//!
5//! ## Features
6//!
7//! - **Process CPU/Memory tracking** - Automatic detection of current app usage
8//! - **System-wide monitoring** - CPU, memory, load average
9//! - **Sub-millisecond updates** - Fast health checks
10//! - **Cross-platform** - Works on Linux, macOS, Windows
11//! - **Zero allocations** - Pure atomic operations
12//! - **Health scoring** - Intelligent system health assessment
13
14use std::io;
15use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
16use std::time::{Duration, Instant};
17
18#[cfg(not(target_os = "linux"))]
19use sysinfo::{get_current_pid, CpuExt, ProcessExt, System, SystemExt};
20
21/// System health monitor with process introspection
22///
23/// Tracks both system-wide and process-specific resource usage.
24/// Cache-line aligned for maximum performance.
25#[repr(align(64))]
26pub struct SystemHealth {
27    /// Last system CPU usage (percentage * 100)
28    system_cpu: AtomicU32,
29    /// Last process CPU usage (percentage * 100)
30    process_cpu: AtomicU32,
31    /// System memory usage in MB
32    system_memory_mb: AtomicU64,
33    /// Process memory usage in MB
34    process_memory_mb: AtomicU64,
35    /// System load average (1 min * 100)
36    load_average: AtomicU32,
37    /// Process thread count
38    thread_count: AtomicU32,
39    /// Process file descriptor count
40    fd_count: AtomicU32,
41    /// Overall health score (0-10000, where 10000 = 100%)
42    health_score: AtomicU32,
43    /// Milliseconds since `created_at` at the last metrics refresh.
44    ///
45    /// Stored as a single time unit (milliseconds) so the throttle check in
46    /// [`Self::maybe_update`] compares like-for-like. Earlier revisions stored
47    /// this as nanoseconds while the throttle compared it against
48    /// `update_interval_ms`, freezing refreshes indefinitely.
49    last_update_ms: AtomicU64,
50    /// Update interval in milliseconds
51    update_interval_ms: u64,
52    /// Creation timestamp
53    created_at: Instant,
54    /// Linux-only delta-sample state for process CPU. Stores `(prev_clock_ticks, prev_elapsed_ms)`.
55    /// `prev_elapsed_ms = u64::MAX` sentinel = "no prior sample yet".
56    #[cfg(target_os = "linux")]
57    proc_cpu_prev: std::sync::atomic::AtomicU64,
58    #[cfg(target_os = "linux")]
59    proc_cpu_prev_ms: std::sync::atomic::AtomicU64,
60    #[cfg(not(target_os = "linux"))]
61    sys: parking_lot::Mutex<System>,
62    #[cfg(not(target_os = "linux"))]
63    pid: Option<sysinfo::Pid>,
64}
65
66/// System resource usage snapshot
67#[derive(Debug, Clone)]
68#[cfg_attr(feature = "serde", derive(serde::Serialize))]
69pub struct SystemSnapshot {
70    /// System CPU usage percentage (0.0-100.0)
71    pub system_cpu_percent: f64,
72    /// Process CPU usage percentage (0.0-100.0)  
73    pub process_cpu_percent: f64,
74    /// System memory usage in MB
75    pub system_memory_mb: u64,
76    /// Process memory usage in MB
77    pub process_memory_mb: u64,
78    /// System load average (1 minute)
79    pub load_average: f64,
80    /// Number of process threads
81    pub thread_count: u32,
82    /// Number of file descriptors
83    pub fd_count: u32,
84    /// Overall health score (0.0-100.0)
85    pub health_score: f64,
86    /// Time since last update
87    pub last_update: Duration,
88}
89
90/// Process-specific resource usage
91#[derive(Debug, Clone)]
92#[cfg_attr(feature = "serde", derive(serde::Serialize))]
93pub struct ProcessStats {
94    /// CPU usage percentage
95    pub cpu_percent: f64,
96    /// Memory usage in megabytes
97    pub memory_mb: f64,
98    /// Number of threads
99    pub threads: u32,
100    /// Number of file handles
101    pub file_handles: u32,
102    /// Process uptime
103    pub uptime: Duration,
104}
105
106impl SystemHealth {
107    /// Create new system health monitor
108    #[inline]
109    pub fn new() -> Self {
110        let instance = Self {
111            system_cpu: AtomicU32::new(0),
112            process_cpu: AtomicU32::new(0),
113            system_memory_mb: AtomicU64::new(0),
114            process_memory_mb: AtomicU64::new(0),
115            load_average: AtomicU32::new(0),
116            thread_count: AtomicU32::new(0),
117            fd_count: AtomicU32::new(0),
118            health_score: AtomicU32::new(10000), // Start with perfect health
119            last_update_ms: AtomicU64::new(0),
120            update_interval_ms: 1000, // 1 second default
121            created_at: Instant::now(),
122            #[cfg(target_os = "linux")]
123            proc_cpu_prev: std::sync::atomic::AtomicU64::new(0),
124            #[cfg(target_os = "linux")]
125            proc_cpu_prev_ms: std::sync::atomic::AtomicU64::new(u64::MAX),
126            #[cfg(not(target_os = "linux"))]
127            sys: parking_lot::Mutex::new(System::new()),
128            #[cfg(not(target_os = "linux"))]
129            pid: get_current_pid().ok(),
130        };
131
132        // Do initial update
133        instance.update_metrics();
134        instance
135    }
136
137    /// Create with custom update interval
138    #[inline]
139    pub fn with_interval(interval: Duration) -> Self {
140        let mut instance = Self::new();
141        instance.update_interval_ms = interval.as_millis() as u64;
142        instance
143    }
144
145    /// Get system CPU usage percentage - SIMPLE AF API
146    #[inline]
147    pub fn cpu_used(&self) -> f64 {
148        self.maybe_update();
149        self.system_cpu.load(Ordering::Relaxed) as f64 / 100.0
150    }
151
152    /// Get system CPU free percentage
153    #[inline]
154    pub fn cpu_free(&self) -> f64 {
155        100.0 - self.cpu_used()
156    }
157
158    /// Get system memory usage in MB
159    #[inline]
160    pub fn mem_used_mb(&self) -> f64 {
161        self.maybe_update();
162        self.system_memory_mb.load(Ordering::Relaxed) as f64
163    }
164
165    /// Get system memory usage in GB
166    #[inline]
167    pub fn mem_used_gb(&self) -> f64 {
168        self.mem_used_mb() / 1024.0
169    }
170
171    /// Get process CPU usage percentage
172    #[inline]
173    pub fn process_cpu_used(&self) -> f64 {
174        self.maybe_update();
175        self.process_cpu.load(Ordering::Relaxed) as f64 / 100.0
176    }
177
178    /// Get process memory usage in MB
179    #[inline]
180    pub fn process_mem_used_mb(&self) -> f64 {
181        self.maybe_update();
182        self.process_memory_mb.load(Ordering::Relaxed) as f64
183    }
184
185    /// Get system load average
186    #[inline]
187    pub fn load_avg(&self) -> f64 {
188        self.maybe_update();
189        self.load_average.load(Ordering::Relaxed) as f64 / 100.0
190    }
191
192    /// Get process thread count
193    #[inline]
194    pub fn thread_count(&self) -> u32 {
195        self.maybe_update();
196        self.thread_count.load(Ordering::Relaxed)
197    }
198
199    /// Get process file descriptor count
200    #[inline]
201    pub fn fd_count(&self) -> u32 {
202        self.maybe_update();
203        self.fd_count.load(Ordering::Relaxed)
204    }
205
206    /// Get overall system health score (0.0-100.0)
207    #[inline]
208    pub fn health_score(&self) -> f64 {
209        self.maybe_update();
210        self.health_score.load(Ordering::Relaxed) as f64 / 100.0
211    }
212
213    /// Quick health check - sub-microsecond if cached
214    #[inline(always)]
215    pub fn quick_check(&self) -> HealthStatus {
216        let score = self.health_score();
217
218        if score >= 80.0 {
219            HealthStatus::Healthy
220        } else if score >= 60.0 {
221            HealthStatus::Warning
222        } else if score >= 40.0 {
223            HealthStatus::Degraded
224        } else {
225            HealthStatus::Critical
226        }
227    }
228
229    /// Force immediate update of all metrics
230    #[inline]
231    pub fn update(&self) {
232        self.update_metrics();
233    }
234
235    /// Get detailed system snapshot
236    pub fn snapshot(&self) -> SystemSnapshot {
237        self.maybe_update();
238
239        // Report **time since last update** (not "monotonic ms at last update").
240        let now_ms = self.created_at.elapsed().as_millis() as u64;
241        let last_ms = self.last_update_ms.load(Ordering::Relaxed);
242        let last_update = Duration::from_millis(now_ms.saturating_sub(last_ms));
243
244        SystemSnapshot {
245            system_cpu_percent: self.system_cpu.load(Ordering::Relaxed) as f64 / 100.0,
246            process_cpu_percent: self.process_cpu.load(Ordering::Relaxed) as f64 / 100.0,
247            system_memory_mb: self.system_memory_mb.load(Ordering::Relaxed),
248            process_memory_mb: self.process_memory_mb.load(Ordering::Relaxed),
249            load_average: self.load_average.load(Ordering::Relaxed) as f64 / 100.0,
250            thread_count: self.thread_count.load(Ordering::Relaxed),
251            fd_count: self.fd_count.load(Ordering::Relaxed),
252            health_score: self.health_score.load(Ordering::Relaxed) as f64 / 100.0,
253            last_update,
254        }
255    }
256
257    /// Get process-specific statistics
258    pub fn process(&self) -> ProcessStats {
259        self.maybe_update();
260
261        ProcessStats {
262            cpu_percent: self.process_cpu.load(Ordering::Relaxed) as f64 / 100.0,
263            memory_mb: self.process_memory_mb.load(Ordering::Relaxed) as f64,
264            threads: self.thread_count.load(Ordering::Relaxed),
265            file_handles: self.fd_count.load(Ordering::Relaxed),
266            uptime: self.created_at.elapsed(),
267        }
268    }
269
270    // Internal implementation
271
272    #[inline]
273    fn maybe_update(&self) {
274        let now_ms = self.created_at.elapsed().as_millis() as u64;
275        let last_ms = self.last_update_ms.load(Ordering::Relaxed);
276
277        if now_ms.saturating_sub(last_ms) > self.update_interval_ms {
278            self.update_metrics();
279        }
280    }
281
282    fn update_metrics(&self) {
283        let now_ms = self.created_at.elapsed().as_millis() as u64;
284
285        // Update system metrics
286        if let Ok(cpu) = self.get_system_cpu() {
287            self.system_cpu
288                .store((cpu * 100.0) as u32, Ordering::Relaxed);
289        }
290
291        if let Ok(memory_mb) = self.get_system_memory_mb() {
292            self.system_memory_mb.store(memory_mb, Ordering::Relaxed);
293        }
294
295        if let Ok(load) = self.get_load_average() {
296            self.load_average
297                .store((load * 100.0) as u32, Ordering::Relaxed);
298        }
299
300        // Update process metrics
301        if let Ok(cpu) = self.get_process_cpu() {
302            self.process_cpu
303                .store((cpu * 100.0) as u32, Ordering::Relaxed);
304        }
305
306        if let Ok(memory_mb) = self.get_process_memory_mb() {
307            self.process_memory_mb.store(memory_mb, Ordering::Relaxed);
308        }
309
310        if let Ok(threads) = self.get_thread_count() {
311            self.thread_count.store(threads, Ordering::Relaxed);
312        }
313
314        if let Ok(fds) = self.get_fd_count() {
315            self.fd_count.store(fds, Ordering::Relaxed);
316        }
317
318        // Calculate health score
319        let health = self.calculate_health_score();
320        self.health_score
321            .store((health * 100.0) as u32, Ordering::Relaxed);
322
323        self.last_update_ms.store(now_ms, Ordering::Relaxed);
324    }
325
326    fn calculate_health_score(&self) -> f64 {
327        let mut score: f64 = 100.0;
328
329        // CPU penalty (system)
330        let system_cpu = self.system_cpu.load(Ordering::Relaxed) as f64 / 100.0;
331        if system_cpu > 80.0 {
332            score -= 30.0; // Heavy penalty for high CPU
333        } else if system_cpu > 60.0 {
334            score -= 15.0;
335        } else if system_cpu > 40.0 {
336            score -= 5.0;
337        }
338
339        // Load average penalty
340        let load = self.load_average.load(Ordering::Relaxed) as f64 / 100.0;
341        let cpu_count = num_cpus::get() as f64;
342        if load > cpu_count * 2.0 {
343            score -= 25.0;
344        } else if load > cpu_count * 1.5 {
345            score -= 10.0;
346        } else if load > cpu_count {
347            score -= 5.0;
348        }
349
350        // Process CPU penalty
351        let process_cpu = self.process_cpu.load(Ordering::Relaxed) as f64 / 100.0;
352        if process_cpu > 50.0 {
353            score -= 15.0;
354        } else if process_cpu > 25.0 {
355            score -= 8.0;
356        }
357
358        // Memory pressure (simplified - would need actual available memory)
359        let memory_gb = self.system_memory_mb.load(Ordering::Relaxed) as f64 / 1024.0;
360        if memory_gb > 16.0 {
361            // Assuming this is high usage
362            score -= 10.0;
363        } else if memory_gb > 8.0 {
364            score -= 5.0;
365        }
366
367        // Thread count penalty (too many threads can indicate issues)
368        let threads = self.thread_count.load(Ordering::Relaxed);
369        if threads > 1000 {
370            score -= 20.0;
371        } else if threads > 500 {
372            score -= 10.0;
373        } else if threads > 200 {
374            score -= 5.0;
375        }
376
377        // File descriptor penalty
378        let fds = self.fd_count.load(Ordering::Relaxed);
379        if fds > 10000 {
380            score -= 15.0;
381        } else if fds > 5000 {
382            score -= 8.0;
383        } else if fds > 1000 {
384            score -= 3.0;
385        }
386
387        score.max(0.0)
388    }
389
390    // Platform-specific implementations
391
392    #[cfg(target_os = "linux")]
393    fn get_system_cpu(&self) -> io::Result<f64> {
394        let contents = std::fs::read_to_string("/proc/stat")?;
395        if let Some(line) = contents.lines().next() {
396            let parts: Vec<&str> = line.split_whitespace().collect();
397            if parts.len() >= 5 && parts[0] == "cpu" {
398                let user: u64 = parts[1].parse().unwrap_or(0);
399                let nice: u64 = parts[2].parse().unwrap_or(0);
400                let system: u64 = parts[3].parse().unwrap_or(0);
401                let idle: u64 = parts[4].parse().unwrap_or(0);
402
403                let total = user + nice + system + idle;
404                let used = user + nice + system;
405
406                if total > 0 {
407                    return Ok(used as f64 / total as f64 * 100.0);
408                }
409            }
410        }
411        Ok(0.0)
412    }
413
414    #[cfg(not(target_os = "linux"))]
415    fn get_system_cpu(&self) -> io::Result<f64> {
416        // Cross-platform via sysinfo
417        let mut guard = self.sys.lock();
418        guard.refresh_cpu();
419        Ok(guard.global_cpu_info().cpu_usage() as f64)
420    }
421
422    #[cfg(target_os = "linux")]
423    fn get_system_memory_mb(&self) -> io::Result<u64> {
424        let contents = std::fs::read_to_string("/proc/meminfo")?;
425        let mut total_kb = 0u64;
426        let mut free_kb = 0u64;
427        let mut available_kb = 0u64;
428
429        for line in contents.lines() {
430            if line.starts_with("MemTotal:") {
431                total_kb = line
432                    .split_whitespace()
433                    .nth(1)
434                    .and_then(|s| s.parse().ok())
435                    .unwrap_or(0);
436            } else if line.starts_with("MemFree:") {
437                free_kb = line
438                    .split_whitespace()
439                    .nth(1)
440                    .and_then(|s| s.parse().ok())
441                    .unwrap_or(0);
442            } else if line.starts_with("MemAvailable:") {
443                available_kb = line
444                    .split_whitespace()
445                    .nth(1)
446                    .and_then(|s| s.parse().ok())
447                    .unwrap_or(0);
448            }
449        }
450
451        // Use available if present, otherwise fall back to free
452        let used_kb = if available_kb > 0 {
453            total_kb - available_kb
454        } else {
455            total_kb - free_kb
456        };
457
458        Ok(used_kb / 1024) // Convert to MB
459    }
460
461    #[cfg(not(target_os = "linux"))]
462    fn get_system_memory_mb(&self) -> io::Result<u64> {
463        let mut guard = self.sys.lock();
464        guard.refresh_memory();
465        // sysinfo reports memory in KiB
466        let used_kib = guard.used_memory();
467        Ok(used_kib / 1024)
468    }
469
470    #[cfg(target_os = "linux")]
471    fn get_load_average(&self) -> io::Result<f64> {
472        let contents = std::fs::read_to_string("/proc/loadavg")?;
473        if let Some(first) = contents.split_whitespace().next() {
474            return first
475                .parse()
476                .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid load average"));
477        }
478        Ok(0.0)
479    }
480
481    #[cfg(not(target_os = "linux"))]
482    fn get_load_average(&self) -> io::Result<f64> {
483        let guard = self.sys.lock();
484        let la = guard.load_average();
485        Ok(la.one)
486    }
487
488    #[cfg(target_os = "linux")]
489    fn get_process_cpu(&self) -> io::Result<f64> {
490        // Delta sample: ((utime + stime) - prev) / (clock_ticks_per_sec * elapsed_s * cores) * 100
491        // First sample returns 0 (no prior baseline) and seeds the state.
492        let contents = std::fs::read_to_string("/proc/self/stat")?;
493        let parts: Vec<&str> = contents.split_whitespace().collect();
494        if parts.len() < 15 {
495            return Ok(0.0);
496        }
497        let utime: u64 = parts[13].parse().unwrap_or(0);
498        let stime: u64 = parts[14].parse().unwrap_or(0);
499        let total_ticks = utime.saturating_add(stime);
500        let now_ms = self.created_at.elapsed().as_millis() as u64;
501
502        let prev_ticks = self.proc_cpu_prev.load(Ordering::Relaxed);
503        let prev_ms = self.proc_cpu_prev_ms.load(Ordering::Relaxed);
504
505        // Store current sample for the next call.
506        self.proc_cpu_prev.store(total_ticks, Ordering::Relaxed);
507        self.proc_cpu_prev_ms.store(now_ms, Ordering::Relaxed);
508
509        if prev_ms == u64::MAX {
510            // First sample — no delta yet.
511            return Ok(0.0);
512        }
513        let elapsed_ms = now_ms.saturating_sub(prev_ms);
514        if elapsed_ms == 0 {
515            return Ok(0.0);
516        }
517        let delta_ticks = total_ticks.saturating_sub(prev_ticks) as f64;
518        // Linux clock ticks per second is conventionally 100; the precise value
519        // would come from `sysconf(_SC_CLK_TCK)`, but the standard kernel build
520        // uses USER_HZ=100 and this matches `/proc/stat` semantics.
521        let clk_tck: f64 = 100.0;
522        let elapsed_s = elapsed_ms as f64 / 1000.0;
523        let cores = num_cpus::get().max(1) as f64;
524        // Per-core percentage: 100% means one whole core saturated.
525        let pct = (delta_ticks / (clk_tck * elapsed_s * cores)) * 100.0;
526        Ok(pct.clamp(0.0, 100.0))
527    }
528
529    #[cfg(not(target_os = "linux"))]
530    fn get_process_cpu(&self) -> io::Result<f64> {
531        let mut guard = self.sys.lock();
532        if let Some(pid) = self.pid {
533            guard.refresh_process(pid);
534            if let Some(proc_) = guard.process(pid) {
535                // sysinfo's cpu_usage can exceed 100 on multi-core hosts.
536                // Normalize to per-core percentage (0..100).
537                let raw = proc_.cpu_usage() as f64;
538                let cores = num_cpus::get() as f64;
539                let norm = if cores > 0.0 { raw / cores } else { raw };
540                return Ok(norm.clamp(0.0, 100.0));
541            }
542        }
543        Ok(0.0)
544    }
545
546    #[cfg(target_os = "linux")]
547    fn get_process_memory_mb(&self) -> io::Result<u64> {
548        let contents = std::fs::read_to_string("/proc/self/status")?;
549        for line in contents.lines() {
550            if line.starts_with("VmRSS:") {
551                if let Some(kb_str) = line.split_whitespace().nth(1) {
552                    if let Ok(kb) = kb_str.parse::<u64>() {
553                        return Ok(kb / 1024); // Convert to MB
554                    }
555                }
556            }
557        }
558        Ok(0)
559    }
560
561    #[cfg(not(target_os = "linux"))]
562    fn get_process_memory_mb(&self) -> io::Result<u64> {
563        let mut guard = self.sys.lock();
564        if let Some(pid) = self.pid {
565            guard.refresh_process(pid);
566            if let Some(proc_) = guard.process(pid) {
567                // memory() in KiB
568                return Ok(proc_.memory() / 1024);
569            }
570        }
571        Ok(0)
572    }
573
574    #[cfg(target_os = "linux")]
575    fn get_thread_count(&self) -> io::Result<u32> {
576        let contents = std::fs::read_to_string("/proc/self/status")?;
577        for line in contents.lines() {
578            if line.starts_with("Threads:") {
579                if let Some(count_str) = line.split_whitespace().nth(1) {
580                    if let Ok(count) = count_str.parse() {
581                        return Ok(count);
582                    }
583                }
584            }
585        }
586        Ok(1) // At least 1 thread (current)
587    }
588
589    #[cfg(not(target_os = "linux"))]
590    fn get_thread_count(&self) -> io::Result<u32> {
591        // sysinfo doesn't expose per-process thread count uniformly; approximate with 1
592        // until a portable method is added.
593        Ok(1)
594    }
595
596    #[cfg(target_os = "linux")]
597    fn get_fd_count(&self) -> io::Result<u32> {
598        match std::fs::read_dir("/proc/self/fd") {
599            Ok(entries) => Ok(entries.count() as u32),
600            Err(_) => Ok(0),
601        }
602    }
603
604    #[cfg(not(target_os = "linux"))]
605    fn get_fd_count(&self) -> io::Result<u32> {
606        // Not portable via sysinfo; return 0 on non-Linux.
607        Ok(0)
608    }
609}
610
611/// System health status
612#[derive(Debug, Clone, Copy, PartialEq, Eq)]
613#[cfg_attr(feature = "serde", derive(serde::Serialize))]
614pub enum HealthStatus {
615    /// System is healthy (80%+ score)
616    Healthy,
617    /// System has warnings (60-80% score)
618    Warning,
619    /// System is degraded (40-60% score)
620    Degraded,
621    /// System is in critical state (<40% score)
622    Critical,
623}
624
625impl HealthStatus {
626    /// Check if status indicates system is degraded or worse
627    #[inline]
628    pub fn is_degraded(&self) -> bool {
629        matches!(self, Self::Degraded | Self::Critical)
630    }
631
632    /// Check if status indicates system is healthy
633    #[inline]
634    pub fn is_healthy(&self) -> bool {
635        matches!(self, Self::Healthy)
636    }
637
638    /// Check if status has warnings or worse
639    #[inline]
640    pub fn has_issues(&self) -> bool {
641        !matches!(self, Self::Healthy)
642    }
643}
644
645impl Default for SystemHealth {
646    fn default() -> Self {
647        Self::new()
648    }
649}
650
651impl std::fmt::Display for SystemHealth {
652    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
653        let snapshot = self.snapshot();
654        write!(
655            f,
656            "SystemHealth(CPU: {:.1}%, Mem: {} MB, Health: {:.1}%)",
657            snapshot.system_cpu_percent, snapshot.system_memory_mb, snapshot.health_score
658        )
659    }
660}
661
662impl std::fmt::Debug for SystemHealth {
663    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
664        let snapshot = self.snapshot();
665        f.debug_struct("SystemHealth")
666            .field("system_cpu", &snapshot.system_cpu_percent)
667            .field("process_cpu", &snapshot.process_cpu_percent)
668            .field("system_memory_mb", &snapshot.system_memory_mb)
669            .field("process_memory_mb", &snapshot.process_memory_mb)
670            .field("load_average", &snapshot.load_average)
671            .field("threads", &snapshot.thread_count)
672            .field("fds", &snapshot.fd_count)
673            .field("health_score", &snapshot.health_score)
674            .finish()
675    }
676}
677
678impl std::fmt::Display for HealthStatus {
679    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
680        match self {
681            Self::Healthy => write!(f, "Healthy"),
682            Self::Warning => write!(f, "Warning"),
683            Self::Degraded => write!(f, "Degraded"),
684            Self::Critical => write!(f, "Critical"),
685        }
686    }
687}
688
689// Thread safety
690// SystemHealth is composed of atomic types (Send + Sync), `Instant` (Send +
691// Sync), `parking_lot::Mutex<sysinfo::System>` (Send + Sync via the contained
692// `System: Send`), and a `sysinfo::Pid` (Send + Sync). The compiler derives
693// Send + Sync automatically; no explicit `unsafe impl` is required.
694
695#[cfg(test)]
696mod tests {
697    use super::*;
698    use std::thread;
699
700    #[test]
701    fn test_basic_functionality() {
702        let health = SystemHealth::new();
703
704        // Should be able to get all metrics
705        let _cpu = health.cpu_used();
706        let _mem = health.mem_used_mb();
707        let _process_cpu = health.process_cpu_used();
708        let _process_mem = health.process_mem_used_mb();
709        let _load = health.load_avg();
710        let _threads = health.thread_count();
711        let _fds = health.fd_count();
712        let _score = health.health_score();
713
714        // Health check should work
715        let status = health.quick_check();
716        assert!(matches!(
717            status,
718            HealthStatus::Healthy
719                | HealthStatus::Warning
720                | HealthStatus::Degraded
721                | HealthStatus::Critical
722        ));
723    }
724
725    #[test]
726    fn test_cpu_free() {
727        let health = SystemHealth::new();
728
729        let used = health.cpu_used();
730        let free = health.cpu_free();
731
732        // Used + free should approximately equal 100%
733        assert!((used + free - 100.0).abs() < 0.1);
734    }
735
736    #[test]
737    fn test_memory_units() {
738        let health = SystemHealth::new();
739
740        let mb = health.mem_used_mb();
741        let gb = health.mem_used_gb();
742
743        // GB should be approximately MB / 1024
744        if mb > 0.0 {
745            assert!((gb * 1024.0 - mb).abs() < 1.0);
746        }
747    }
748
749    #[test]
750    fn test_snapshot() {
751        let health = SystemHealth::new();
752
753        let snapshot = health.snapshot();
754
755        // Snapshot should have reasonable values
756        assert!(snapshot.system_cpu_percent >= 0.0);
757        assert!(snapshot.system_cpu_percent <= 100.0);
758        assert!(snapshot.health_score >= 0.0);
759        assert!(snapshot.health_score <= 100.0);
760        assert!(snapshot.thread_count > 0); // Should have at least 1 thread
761    }
762
763    #[test]
764    fn test_process_stats() {
765        let health = SystemHealth::new();
766
767        let stats = health.process();
768
769        assert!(stats.threads > 0); // Should have at least current thread
770        assert!(stats.uptime > Duration::ZERO);
771        assert!(stats.cpu_percent >= 0.0);
772        assert!(stats.memory_mb >= 0.0);
773    }
774
775    #[test]
776    fn test_health_status() {
777        let healthy = HealthStatus::Healthy;
778        let warning = HealthStatus::Warning;
779        let degraded = HealthStatus::Degraded;
780        let critical = HealthStatus::Critical;
781
782        assert!(healthy.is_healthy());
783        assert!(!healthy.is_degraded());
784        assert!(!healthy.has_issues());
785
786        assert!(!warning.is_healthy());
787        assert!(!warning.is_degraded());
788        assert!(warning.has_issues());
789
790        assert!(!degraded.is_healthy());
791        assert!(degraded.is_degraded());
792        assert!(degraded.has_issues());
793
794        assert!(!critical.is_healthy());
795        assert!(critical.is_degraded());
796        assert!(critical.has_issues());
797    }
798
799    #[test]
800    fn test_custom_interval() {
801        let health = SystemHealth::with_interval(Duration::from_millis(500));
802
803        // Should still work with custom interval
804        let _cpu = health.cpu_used();
805        let _score = health.health_score();
806    }
807
808    #[test]
809    fn test_maybe_update_actually_refreshes_after_interval() {
810        // 0.9.2 regression: maybe_update previously compared milliseconds
811        // against a nanosecond-typed `last_update`, which froze the throttle
812        // and pinned all values to their initial reads. After the fix,
813        // `last_update_ms` is observed to advance once the interval elapses.
814        let health = SystemHealth::with_interval(Duration::from_millis(50));
815        let snap_before = health.snapshot();
816        let last_ms_before = health.last_update_ms.load(Ordering::Relaxed);
817
818        // Sleep beyond the interval and then poke the cache.
819        thread::sleep(Duration::from_millis(120));
820        let _ = health.cpu_used(); // triggers maybe_update path
821        let snap_after = health.snapshot();
822        let last_ms_after = health.last_update_ms.load(Ordering::Relaxed);
823
824        assert!(
825            last_ms_after > last_ms_before,
826            "last_update_ms should advance after sleeping past the throttle interval \
827             (before={last_ms_before}, after={last_ms_after})",
828        );
829        // The `last_update` Duration on the snapshot should be small (since
830        // we just refreshed) rather than equal-to-monotonic-time-since-creation,
831        // which was the prior bug.
832        assert!(
833            snap_after.last_update <= Duration::from_secs(1),
834            "snapshot.last_update should be 'time since last refresh', \
835             got {:?}",
836            snap_after.last_update,
837        );
838        // Sanity: snapshot fields still produce finite values.
839        assert!(snap_before.system_cpu_percent.is_finite());
840        assert!(snap_after.system_cpu_percent.is_finite());
841    }
842
843    #[test]
844    fn test_force_update() {
845        let health = SystemHealth::new();
846
847        let score_before = health.health_score();
848
849        // Force update
850        health.update();
851
852        let score_after = health.health_score();
853
854        // Scores might be different or the same, but both should be valid
855        assert!(score_before >= 0.0);
856        assert!(score_after >= 0.0);
857    }
858
859    #[test]
860    fn test_concurrent_access() {
861        let health = std::sync::Arc::new(SystemHealth::new());
862        let mut handles = vec![];
863
864        // Spawn multiple threads accessing health metrics
865        for _ in 0..10 {
866            let health_clone = health.clone();
867            let handle = thread::spawn(move || {
868                for _ in 0..100 {
869                    let _cpu = health_clone.cpu_used();
870                    let _mem = health_clone.mem_used_mb();
871                    let _status = health_clone.quick_check();
872                }
873            });
874            handles.push(handle);
875        }
876
877        // Wait for all threads
878        for handle in handles {
879            handle.join().unwrap();
880        }
881
882        // Should still be functional
883        let final_score = health.health_score();
884        assert!((0.0..=100.0).contains(&final_score));
885    }
886
887    #[test]
888    fn test_display_formatting() {
889        let health = SystemHealth::new();
890
891        let display_str = format!("{health}");
892        assert!(display_str.contains("SystemHealth"));
893        assert!(display_str.contains("CPU"));
894        assert!(display_str.contains("Mem"));
895
896        let debug_str = format!("{health:?}");
897        assert!(debug_str.contains("SystemHealth"));
898
899        let status = health.quick_check();
900        let status_str = format!("{status}");
901        assert!(!status_str.is_empty());
902    }
903}
904
905#[cfg(all(test, feature = "bench-tests", not(tarpaulin), not(coverage)))]
906#[allow(unused_imports)]
907mod benchmarks {
908    use super::*;
909    use std::time::Instant;
910
911    #[cfg_attr(not(feature = "bench-tests"), ignore)]
912    #[test]
913    fn bench_quick_check() {
914        let health = SystemHealth::new();
915        let iterations = 1_000_000;
916
917        let start = Instant::now();
918        for _ in 0..iterations {
919            let _ = health.quick_check();
920        }
921        let elapsed = start.elapsed();
922
923        println!(
924            "SystemHealth quick_check: {:.2} ns/op",
925            elapsed.as_nanos() as f64 / iterations as f64
926        );
927
928        // Should be extremely fast when cached (relaxed from 100ns to 200ns)
929        assert!(elapsed.as_nanos() / iterations < 200);
930    }
931
932    #[cfg_attr(not(feature = "bench-tests"), ignore)]
933    #[test]
934    fn bench_cached_metrics() {
935        let health = SystemHealth::new();
936        let iterations = 1_000_000;
937
938        let start = Instant::now();
939        for _ in 0..iterations {
940            let _ = health.cpu_used();
941            let _ = health.mem_used_mb();
942            let _ = health.health_score();
943        }
944        let elapsed = start.elapsed();
945
946        println!(
947            "SystemHealth cached metrics: {:.2} ns/op",
948            elapsed.as_nanos() as f64 / iterations as f64 / 3.0
949        );
950
951        // Should be very fast when cached (relaxed from 500ns to 1000ns)
952        assert!(elapsed.as_nanos() / iterations < 1000);
953    }
954
955    #[cfg_attr(not(feature = "bench-tests"), ignore)]
956    #[test]
957    fn bench_force_update() {
958        let health = SystemHealth::new();
959        let iterations = 1000; // Less iterations since this does real work
960
961        let start = Instant::now();
962        for _ in 0..iterations {
963            health.update();
964        }
965        let elapsed = start.elapsed();
966
967        println!(
968            "SystemHealth force update: {:.2} μs/op",
969            elapsed.as_micros() as f64 / iterations as f64
970        );
971
972        // Should complete updates reasonably fast (relaxed from 1000ms to 2000ms)
973        assert!(elapsed.as_millis() < 2000);
974    }
975}