Skip to main content

memscope_rs/capture/
system_monitor.rs

1//! Global System Monitor - Background Thread Collection
2//!
3//! Architecture:
4//! - Background thread collects system metrics every 100ms
5//! - Atomic variables store current values (lock-free reads)
6//! - `track!` only reads atomic values (nanosecond overhead)
7//! - No blocking on data collection
8//!
9//! Features:
10//! - CPU monitoring
11//! - Memory monitoring
12//! - I/O monitoring (basic)
13//! - GPU monitoring (platform-specific)
14//! - Correlation analysis
15//! - Performance scoring
16
17#![allow(warnings, unused)]
18
19use serde::{Deserialize, Serialize};
20use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
21use std::sync::{Arc, Mutex};
22use std::thread::{self, JoinHandle};
23use std::time::{Duration, Instant};
24use sysinfo::System;
25
26static SYSTEM_MONITOR: std::sync::OnceLock<SystemMonitor> = std::sync::OnceLock::new();
27
28pub struct SystemMonitor {
29    cpu_usage: Arc<AtomicU64>,
30    memory_available: Arc<AtomicU64>,
31    memory_total: Arc<AtomicU64>,
32    disk_read_bps: Arc<AtomicU64>,
33    disk_write_bps: Arc<AtomicU64>,
34    network_rx_bps: Arc<AtomicU64>,
35    network_tx_bps: Arc<AtomicU64>,
36    gpu_usage: Arc<AtomicU64>,
37    gpu_memory_used: Arc<AtomicU64>,
38    gpu_memory_total: Arc<AtomicU64>,
39    last_update: Arc<AtomicU64>,
40    running: Arc<AtomicBool>,
41    handle: Mutex<Option<JoinHandle<()>>>,
42}
43
44impl SystemMonitor {
45    fn new() -> Self {
46        let cpu_usage = Arc::new(AtomicU64::new(0));
47        let memory_available = Arc::new(AtomicU64::new(0));
48        let memory_total = Arc::new(AtomicU64::new(0));
49        let disk_read_bps = Arc::new(AtomicU64::new(0));
50        let disk_write_bps = Arc::new(AtomicU64::new(0));
51        let network_rx_bps = Arc::new(AtomicU64::new(0));
52        let network_tx_bps = Arc::new(AtomicU64::new(0));
53        let gpu_usage = Arc::new(AtomicU64::new(0));
54        let gpu_memory_used = Arc::new(AtomicU64::new(0));
55        let gpu_memory_total = Arc::new(AtomicU64::new(0));
56        let last_update = Arc::new(AtomicU64::new(0));
57        let running = Arc::new(AtomicBool::new(true));
58
59        let cpu_usage_clone = cpu_usage.clone();
60        let memory_available_clone = memory_available.clone();
61        let memory_total_clone = memory_total.clone();
62        let disk_read_bps_clone = disk_read_bps.clone();
63        let disk_write_bps_clone = disk_write_bps.clone();
64        let network_rx_bps_clone = network_rx_bps.clone();
65        let network_tx_bps_clone = network_tx_bps.clone();
66        let gpu_usage_clone = gpu_usage.clone();
67        let gpu_memory_used_clone = gpu_memory_used.clone();
68        let gpu_memory_total_clone = gpu_memory_total.clone();
69        let last_update_clone = last_update.clone();
70        let running_clone = running.clone();
71
72        let handle = thread::spawn(move || {
73            let mut sys = System::new_all();
74            sys.refresh_all();
75
76            let mut last_refresh = Instant::now();
77            let mut last_disk_read = 0u64;
78            let mut last_disk_write = 0u64;
79            let mut last_network_rx = 0u64;
80            let mut last_network_tx = 0u64;
81
82            while running_clone.load(Ordering::Relaxed) {
83                let now = Instant::now();
84
85                if now.duration_since(last_refresh).as_millis() >= 100 {
86                    sys.refresh_cpu_all();
87                    sys.refresh_memory();
88
89                    let cpus = sys.cpus();
90                    if !cpus.is_empty() {
91                        let total: f64 = cpus.iter().map(|c| c.cpu_usage() as f64).sum();
92                        let avg = (total / cpus.len() as f64).min(100.0);
93                        cpu_usage_clone.store(avg.to_bits(), Ordering::Release);
94                    }
95
96                    memory_available_clone.store(sys.available_memory(), Ordering::Release);
97                    memory_total_clone.store(sys.total_memory(), Ordering::Release);
98
99                    #[cfg(target_os = "linux")]
100                    {
101                        if let Ok(io_stats) = collect_io_stats() {
102                            let elapsed_sec = now.duration_since(last_refresh).as_secs_f64();
103                            if elapsed_sec > 0.0 {
104                                let read_bps = ((io_stats.read_bytes - last_disk_read) as f64
105                                    / elapsed_sec)
106                                    as u64;
107                                let write_bps = ((io_stats.write_bytes - last_disk_write) as f64
108                                    / elapsed_sec)
109                                    as u64;
110                                disk_read_bps_clone.store(read_bps, Ordering::Release);
111                                disk_write_bps_clone.store(write_bps, Ordering::Release);
112                            }
113                            last_disk_read = io_stats.read_bytes;
114                            last_disk_write = io_stats.write_bytes;
115
116                            if let Ok(net_stats) = collect_network_stats() {
117                                let rx_bps = ((net_stats.rx_bytes - last_network_rx) as f64
118                                    / elapsed_sec)
119                                    as u64;
120                                let tx_bps = ((net_stats.tx_bytes - last_network_tx) as f64
121                                    / elapsed_sec)
122                                    as u64;
123                                network_rx_bps_clone.store(rx_bps, Ordering::Release);
124                                network_tx_bps_clone.store(tx_bps, Ordering::Release);
125                                last_network_rx = net_stats.rx_bytes;
126                                last_network_tx = net_stats.tx_bytes;
127                            }
128                        }
129                    }
130
131                    #[cfg(target_os = "linux")]
132                    {
133                        if let Ok(gpu_info) = collect_nvidia_gpu() {
134                            gpu_usage_clone.store(gpu_info.usage.to_bits(), Ordering::Release);
135                            gpu_memory_used_clone.store(gpu_info.memory_used, Ordering::Release);
136                            gpu_memory_total_clone.store(gpu_info.memory_total, Ordering::Release);
137                        }
138                    }
139
140                    last_update_clone.store(
141                        std::time::SystemTime::now()
142                            .duration_since(std::time::UNIX_EPOCH)
143                            .map(|d| d.as_millis() as u64)
144                            .unwrap_or(0),
145                        Ordering::Release,
146                    );
147
148                    last_refresh = now;
149                }
150
151                thread::sleep(Duration::from_millis(50));
152            }
153        });
154
155        Self {
156            cpu_usage,
157            memory_available,
158            memory_total,
159            disk_read_bps,
160            disk_write_bps,
161            network_rx_bps,
162            network_tx_bps,
163            gpu_usage,
164            gpu_memory_used,
165            gpu_memory_total,
166            last_update,
167            running,
168            handle: Mutex::new(Some(handle)),
169        }
170    }
171
172    pub fn global() -> &'static Self {
173        SYSTEM_MONITOR.get_or_init(Self::new)
174    }
175
176    pub fn shutdown() {
177        if let Some(monitor) = SYSTEM_MONITOR.get() {
178            monitor.running.store(false, Ordering::Release);
179            if let Ok(mut handle_guard) = monitor.handle.lock() {
180                if let Some(handle) = handle_guard.take() {
181                    let _ = handle.join();
182                }
183            }
184        }
185    }
186
187    #[inline]
188    pub fn is_running(&self) -> bool {
189        self.running.load(Ordering::Acquire)
190    }
191
192    #[inline]
193    pub fn cpu_usage(&self) -> f64 {
194        let bits = self.cpu_usage.load(Ordering::Acquire);
195        let value = f64::from_bits(bits);
196        if value.is_nan() || value < 0.0 {
197            0.0
198        } else {
199            value.min(100.0)
200        }
201    }
202
203    #[inline]
204    pub fn memory_available(&self) -> u64 {
205        self.memory_available.load(Ordering::Acquire)
206    }
207
208    #[inline]
209    pub fn memory_total(&self) -> u64 {
210        self.memory_total.load(Ordering::Acquire)
211    }
212
213    #[inline]
214    pub fn memory_used(&self) -> u64 {
215        let total = self.memory_total.load(Ordering::Acquire);
216        let available = self.memory_available.load(Ordering::Acquire);
217        total.saturating_sub(available)
218    }
219
220    #[inline]
221    pub fn memory_usage_percent(&self) -> f64 {
222        let total = self.memory_total.load(Ordering::Acquire);
223        let available = self.memory_available.load(Ordering::Acquire);
224        if total > 0 {
225            ((total - available) as f64 / total as f64) * 100.0
226        } else {
227            0.0
228        }
229    }
230
231    #[inline]
232    pub fn disk_read_bps(&self) -> u64 {
233        self.disk_read_bps.load(Ordering::Acquire)
234    }
235
236    #[inline]
237    pub fn disk_write_bps(&self) -> u64 {
238        self.disk_write_bps.load(Ordering::Acquire)
239    }
240
241    #[inline]
242    pub fn network_rx_bps(&self) -> u64 {
243        self.network_rx_bps.load(Ordering::Acquire)
244    }
245
246    #[inline]
247    pub fn network_tx_bps(&self) -> u64 {
248        self.network_tx_bps.load(Ordering::Acquire)
249    }
250
251    #[inline]
252    pub fn gpu_usage(&self) -> f64 {
253        let bits = self.gpu_usage.load(Ordering::Acquire);
254        f64::from_bits(bits)
255    }
256
257    #[inline]
258    pub fn gpu_memory_used(&self) -> u64 {
259        self.gpu_memory_used.load(Ordering::Acquire)
260    }
261
262    #[inline]
263    pub fn gpu_memory_total(&self) -> u64 {
264        self.gpu_memory_total.load(Ordering::Acquire)
265    }
266
267    #[inline]
268    pub fn gpu_memory_usage_percent(&self) -> f64 {
269        let total = self.gpu_memory_total.load(Ordering::Acquire);
270        let used = self.gpu_memory_used.load(Ordering::Acquire);
271        if total > 0 {
272            (used as f64 / total as f64) * 100.0
273        } else {
274            0.0
275        }
276    }
277
278    #[inline]
279    pub fn last_update(&self) -> u64 {
280        self.last_update.load(Ordering::Acquire)
281    }
282
283    #[inline]
284    pub fn thread_count(&self) -> usize {
285        std::thread::available_parallelism()
286            .map(|p| p.get())
287            .unwrap_or(1)
288    }
289
290    pub fn correlation_analysis(&self) -> CorrelationAnalysis {
291        let cpu = self.cpu_usage();
292        let mem_percent = self.memory_usage_percent();
293        let disk_read = self.disk_read_bps();
294        let disk_write = self.disk_write_bps();
295        let network_rx = self.network_rx_bps();
296        let network_tx = self.network_tx_bps();
297        let gpu = self.gpu_usage();
298
299        let cpu_mem_correlation = if cpu > 0.0 && mem_percent > 0.0 {
300            (cpu / mem_percent).min(2.0)
301        } else {
302            0.0
303        };
304
305        let io_intensity = (disk_read + disk_write) as f64 / 1024.0 / 1024.0;
306        let network_intensity = (network_rx + network_tx) as f64 / 1024.0 / 1024.0;
307
308        CorrelationAnalysis {
309            cpu_memory_correlation: cpu_mem_correlation,
310            io_intensity_mb_per_sec: io_intensity,
311            network_intensity_mb_per_sec: network_intensity,
312            gpu_cpu_ratio: if cpu > 0.0 { gpu / cpu } else { 0.0 },
313            system_load_score: (cpu + mem_percent) / 2.0,
314        }
315    }
316
317    pub fn performance_score(&self) -> PerformanceScore {
318        let cpu = self.cpu_usage();
319        let mem_percent = self.memory_usage_percent();
320        let disk_read = self.disk_read_bps();
321        let disk_write = self.disk_write_bps();
322        let network_rx = self.network_rx_bps();
323        let network_tx = self.network_tx_bps();
324
325        let cpu_efficiency = if cpu < 80.0 {
326            (100.0 - cpu) / 100.0
327        } else {
328            (100.0 - cpu) / 100.0 * 0.5
329        };
330
331        let memory_efficiency = if mem_percent < 80.0 {
332            (100.0 - mem_percent) / 100.0
333        } else {
334            (100.0 - mem_percent) / 100.0 * 0.5
335        };
336
337        let io_throughput = (disk_read + disk_write) as f64 / 1024.0 / 1024.0 / 1024.0;
338        let io_efficiency = (io_throughput / 100.0).min(1.0);
339
340        let network_throughput = (network_rx + network_tx) as f64 / 1024.0 / 1024.0 / 1024.0;
341        let network_efficiency = (network_throughput / 10.0).min(1.0);
342
343        let overall_score = (cpu_efficiency * 0.4
344            + memory_efficiency * 0.3
345            + io_efficiency * 0.2
346            + network_efficiency * 0.1)
347            * 100.0;
348
349        PerformanceScore {
350            cpu_efficiency: cpu_efficiency * 100.0,
351            memory_efficiency: memory_efficiency * 100.0,
352            io_efficiency: io_efficiency * 100.0,
353            network_efficiency: network_efficiency * 100.0,
354            overall_score,
355        }
356    }
357}
358
359impl Drop for SystemMonitor {
360    fn drop(&mut self) {
361        self.running.store(false, Ordering::Release);
362
363        if let Ok(mut handle_guard) = self.handle.lock() {
364            if let Some(handle) = handle_guard.take() {
365                std::thread::spawn(move || {
366                    let timeout = std::time::Duration::from_secs(2);
367                    let start = std::time::Instant::now();
368
369                    while start.elapsed() < timeout {
370                        if handle.is_finished() {
371                            let _ = handle.join();
372                            return;
373                        }
374                        std::thread::sleep(std::time::Duration::from_millis(50));
375                    }
376                });
377            }
378        }
379    }
380}
381
382#[derive(Debug, Clone, Serialize, Deserialize)]
383pub struct CorrelationAnalysis {
384    pub cpu_memory_correlation: f64,
385    pub io_intensity_mb_per_sec: f64,
386    pub network_intensity_mb_per_sec: f64,
387    pub gpu_cpu_ratio: f64,
388    pub system_load_score: f64,
389}
390
391#[derive(Debug, Clone, Serialize, Deserialize)]
392pub struct PerformanceScore {
393    pub cpu_efficiency: f64,
394    pub memory_efficiency: f64,
395    pub io_efficiency: f64,
396    pub network_efficiency: f64,
397    pub overall_score: f64,
398}
399
400#[cfg(target_os = "linux")]
401struct IoStats {
402    read_bytes: u64,
403    write_bytes: u64,
404}
405
406#[cfg(target_os = "linux")]
407struct NetworkStats {
408    rx_bytes: u64,
409    tx_bytes: u64,
410}
411
412#[cfg(target_os = "linux")]
413fn collect_io_stats() -> Result<IoStats, Box<dyn std::error::Error>> {
414    use std::fs;
415
416    let content = fs::read_to_string("/proc/diskstats")?;
417    let mut total_read = 0u64;
418    let mut total_write = 0u64;
419
420    for line in content.lines() {
421        let parts: Vec<&str> = line.split_whitespace().collect();
422        if parts.len() >= 6 {
423            if let Ok(read) = parts[5].parse::<u64>() {
424                total_read += read * 512;
425            }
426            if let Ok(write) = parts[9].parse::<u64>() {
427                total_write += write * 512;
428            }
429        }
430    }
431
432    Ok(IoStats {
433        read_bytes: total_read,
434        write_bytes: total_write,
435    })
436}
437
438#[cfg(target_os = "linux")]
439fn collect_network_stats() -> Result<NetworkStats, Box<dyn std::error::Error>> {
440    use std::fs;
441
442    let content = fs::read_to_string("/proc/net/dev")?;
443    let mut total_rx = 0u64;
444    let mut total_tx = 0u64;
445
446    for line in content.lines().skip(2) {
447        let parts: Vec<&str> = line.split_whitespace().collect();
448        if parts.len() >= 10 {
449            if let Ok(rx) = parts[1].parse::<u64>() {
450                total_rx += rx;
451            }
452            if let Ok(tx) = parts[9].parse::<u64>() {
453                total_tx += tx;
454            }
455        }
456    }
457
458    Ok(NetworkStats {
459        rx_bytes: total_rx,
460        tx_bytes: total_tx,
461    })
462}
463
464#[cfg(target_os = "linux")]
465fn collect_nvidia_gpu() -> Result<GpuInfo, Box<dyn std::error::Error>> {
466    use std::process::Command;
467
468    let output = Command::new("nvidia-smi")
469        .args(&[
470            "--query-gpu=utilization.gpu,memory.used,memory.total",
471            "--format=csv,noheader,nounits",
472        ])
473        .output();
474
475    if let Ok(output) = output {
476        if output.status.success() {
477            let stdout = String::from_utf8_lossy(&output.stdout);
478            let parts: Vec<&str> = stdout.trim().split(',').collect();
479            if parts.len() >= 3 {
480                let usage = parts[0].trim().parse::<f64>()?;
481                // Use saturating_mul to prevent overflow for large memory values
482                let memory_used = parts[1].trim().parse::<u64>()?.saturating_mul(1024 * 1024);
483                let memory_total = parts[2].trim().parse::<u64>()?.saturating_mul(1024 * 1024);
484                return Ok(GpuInfo {
485                    usage,
486                    memory_used,
487                    memory_total,
488                });
489            }
490        }
491    }
492
493    Err("Failed to collect GPU info".into())
494}
495
496#[cfg(target_os = "linux")]
497struct GpuInfo {
498    usage: f64,
499    memory_used: u64,
500    memory_total: u64,
501}
502
503pub fn cpu_usage() -> f64 {
504    SystemMonitor::global().cpu_usage()
505}
506
507pub fn memory_available() -> u64 {
508    SystemMonitor::global().memory_available()
509}
510
511pub fn memory_total() -> u64 {
512    SystemMonitor::global().memory_total()
513}
514
515pub fn memory_used() -> u64 {
516    SystemMonitor::global().memory_used()
517}
518
519pub fn memory_usage_percent() -> f64 {
520    SystemMonitor::global().memory_usage_percent()
521}
522
523pub fn thread_count() -> usize {
524    SystemMonitor::global().thread_count()
525}
526
527pub fn disk_read_bps() -> u64 {
528    SystemMonitor::global().disk_read_bps()
529}
530
531pub fn disk_write_bps() -> u64 {
532    SystemMonitor::global().disk_write_bps()
533}
534
535pub fn network_rx_bps() -> u64 {
536    SystemMonitor::global().network_rx_bps()
537}
538
539pub fn network_tx_bps() -> u64 {
540    SystemMonitor::global().network_tx_bps()
541}
542
543pub fn gpu_usage() -> f64 {
544    SystemMonitor::global().gpu_usage()
545}
546
547pub fn gpu_memory_used() -> u64 {
548    SystemMonitor::global().gpu_memory_used()
549}
550
551pub fn gpu_memory_total() -> u64 {
552    SystemMonitor::global().gpu_memory_total()
553}
554
555pub fn gpu_memory_usage_percent() -> f64 {
556    SystemMonitor::global().gpu_memory_usage_percent()
557}
558
559pub fn correlation_analysis() -> CorrelationAnalysis {
560    SystemMonitor::global().correlation_analysis()
561}
562
563pub fn performance_score() -> PerformanceScore {
564    SystemMonitor::global().performance_score()
565}
566
567#[cfg(test)]
568mod tests {
569    use super::*;
570
571    #[test]
572    fn test_system_monitor() {
573        let monitor = SystemMonitor::global();
574
575        thread::sleep(Duration::from_millis(200));
576
577        let cpu = monitor.cpu_usage();
578        println!("CPU usage: {:.2}%", cpu);
579        assert!((0.0..=100.0).contains(&cpu));
580
581        let mem = monitor.memory_used();
582        println!("Memory used: {} bytes", mem);
583
584        let total = monitor.memory_total();
585        println!("Memory total: {} bytes", total);
586    }
587
588    #[test]
589    fn test_io_monitoring() {
590        thread::sleep(Duration::from_millis(200));
591
592        let disk_read = disk_read_bps();
593        let disk_write = disk_write_bps();
594        let network_rx = network_rx_bps();
595        let network_tx = network_tx_bps();
596
597        println!("Disk I/O: {} read/s, {} write/s", disk_read, disk_write);
598        println!("Network: {} rx/s, {} tx/s", network_rx, network_tx);
599    }
600
601    #[test]
602    fn test_correlation_analysis() {
603        thread::sleep(Duration::from_millis(200));
604
605        let analysis = correlation_analysis();
606        println!(
607            "CPU-Memory correlation: {:.2}",
608            analysis.cpu_memory_correlation
609        );
610        println!(
611            "I/O intensity: {:.2} MB/s",
612            analysis.io_intensity_mb_per_sec
613        );
614        println!(
615            "Network intensity: {:.2} MB/s",
616            analysis.network_intensity_mb_per_sec
617        );
618        println!("System load score: {:.2}", analysis.system_load_score);
619    }
620
621    #[test]
622    fn test_performance_score() {
623        thread::sleep(Duration::from_millis(200));
624
625        let score = performance_score();
626        println!("CPU efficiency: {:.2}%", score.cpu_efficiency);
627        println!("Memory efficiency: {:.2}%", score.memory_efficiency);
628        println!("I/O efficiency: {:.2}%", score.io_efficiency);
629        println!("Network efficiency: {:.2}%", score.network_efficiency);
630        println!("Overall score: {:.2}%", score.overall_score);
631    }
632
633    #[test]
634    fn test_convenience_functions() {
635        thread::sleep(Duration::from_millis(150));
636
637        let cpu = cpu_usage();
638        let mem = memory_used();
639        let total = memory_total();
640        let percent = memory_usage_percent();
641        let threads = thread_count();
642
643        println!("CPU: {:.2}%", cpu);
644        println!("Memory: {} / {} ({:.2}%)", mem, total, percent);
645        println!("Threads: {}", threads);
646    }
647}