Skip to main content

memscope_rs/capture/
system_monitor.rs

1//! Global System Monitor - Background Thread Collection
2//!
3//! Architecture:
4//! - Background thread collects system metrics every 100ms
5//! - Atomic variables store current values (lock-free reads)
6//! - `track!` only reads atomic values (nanosecond overhead)
7//! - No blocking on data collection
8//!
9//! Features:
10//! - CPU monitoring
11//! - Memory monitoring
12//! - I/O monitoring (basic)
13//! - GPU monitoring (platform-specific)
14//! - Correlation analysis
15//! - Performance scoring
16
17#![allow(warnings, unused)]
18
19use serde::{Deserialize, Serialize};
20use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
21use std::sync::{Arc, Mutex};
22use std::thread::{self, JoinHandle};
23use std::time::{Duration, Instant};
24use sysinfo::System;
25
26static SYSTEM_MONITOR: std::sync::OnceLock<SystemMonitor> = std::sync::OnceLock::new();
27
28pub struct SystemMonitor {
29    cpu_usage: Arc<AtomicU64>,
30    memory_available: Arc<AtomicU64>,
31    memory_total: Arc<AtomicU64>,
32    disk_read_bps: Arc<AtomicU64>,
33    disk_write_bps: Arc<AtomicU64>,
34    network_rx_bps: Arc<AtomicU64>,
35    network_tx_bps: Arc<AtomicU64>,
36    gpu_usage: Arc<AtomicU64>,
37    gpu_memory_used: Arc<AtomicU64>,
38    gpu_memory_total: Arc<AtomicU64>,
39    last_update: Arc<AtomicU64>,
40    running: Arc<AtomicBool>,
41    handle: Mutex<Option<JoinHandle<()>>>,
42}
43
44impl SystemMonitor {
45    fn new() -> Self {
46        let cpu_usage = Arc::new(AtomicU64::new(0));
47        let memory_available = Arc::new(AtomicU64::new(0));
48        let memory_total = Arc::new(AtomicU64::new(0));
49        let disk_read_bps = Arc::new(AtomicU64::new(0));
50        let disk_write_bps = Arc::new(AtomicU64::new(0));
51        let network_rx_bps = Arc::new(AtomicU64::new(0));
52        let network_tx_bps = Arc::new(AtomicU64::new(0));
53        let gpu_usage = Arc::new(AtomicU64::new(0));
54        let gpu_memory_used = Arc::new(AtomicU64::new(0));
55        let gpu_memory_total = Arc::new(AtomicU64::new(0));
56        let last_update = Arc::new(AtomicU64::new(0));
57        let running = Arc::new(AtomicBool::new(true));
58
59        let cpu_usage_clone = cpu_usage.clone();
60        let memory_available_clone = memory_available.clone();
61        let memory_total_clone = memory_total.clone();
62        let disk_read_bps_clone = disk_read_bps.clone();
63        let disk_write_bps_clone = disk_write_bps.clone();
64        let network_rx_bps_clone = network_rx_bps.clone();
65        let network_tx_bps_clone = network_tx_bps.clone();
66        let gpu_usage_clone = gpu_usage.clone();
67        let gpu_memory_used_clone = gpu_memory_used.clone();
68        let gpu_memory_total_clone = gpu_memory_total.clone();
69        let last_update_clone = last_update.clone();
70        let running_clone = running.clone();
71
72        let handle = thread::spawn(move || {
73            let mut sys = System::new_all();
74            sys.refresh_all();
75
76            let mut last_refresh = Instant::now();
77            let mut last_disk_read = 0u64;
78            let mut last_disk_write = 0u64;
79            let mut last_network_rx = 0u64;
80            let mut last_network_tx = 0u64;
81
82            while running_clone.load(Ordering::Relaxed) {
83                let now = Instant::now();
84
85                if now.duration_since(last_refresh).as_millis() >= 100 {
86                    sys.refresh_cpu_all();
87                    sys.refresh_memory();
88
89                    let cpus = sys.cpus();
90                    if !cpus.is_empty() {
91                        let total: f64 = cpus.iter().map(|c| c.cpu_usage() as f64).sum();
92                        let avg = (total / cpus.len() as f64).min(100.0);
93                        cpu_usage_clone.store(avg.to_bits(), Ordering::Release);
94                    }
95
96                    memory_available_clone.store(sys.available_memory(), Ordering::Release);
97                    memory_total_clone.store(sys.total_memory(), Ordering::Release);
98
99                    #[cfg(target_os = "linux")]
100                    {
101                        if let Ok(io_stats) = collect_io_stats() {
102                            let elapsed_sec = now.duration_since(last_refresh).as_secs_f64();
103                            if elapsed_sec > 0.0 {
104                                let read_bps = ((io_stats.read_bytes - last_disk_read) as f64
105                                    / elapsed_sec)
106                                    as u64;
107                                let write_bps = ((io_stats.write_bytes - last_disk_write) as f64
108                                    / elapsed_sec)
109                                    as u64;
110                                disk_read_bps_clone.store(read_bps, Ordering::Release);
111                                disk_write_bps_clone.store(write_bps, Ordering::Release);
112                            }
113                            last_disk_read = io_stats.read_bytes;
114                            last_disk_write = io_stats.write_bytes;
115
116                            if let Ok(net_stats) = collect_network_stats() {
117                                let rx_bps = ((net_stats.rx_bytes - last_network_rx) as f64
118                                    / elapsed_sec)
119                                    as u64;
120                                let tx_bps = ((net_stats.tx_bytes - last_network_tx) as f64
121                                    / elapsed_sec)
122                                    as u64;
123                                network_rx_bps_clone.store(rx_bps, Ordering::Release);
124                                network_tx_bps_clone.store(tx_bps, Ordering::Release);
125                                last_network_rx = net_stats.rx_bytes;
126                                last_network_tx = net_stats.tx_bytes;
127                            }
128                        }
129                    }
130
131                    #[cfg(target_os = "linux")]
132                    {
133                        if let Ok(gpu_info) = collect_nvidia_gpu() {
134                            gpu_usage_clone.store(gpu_info.usage.to_bits(), Ordering::Release);
135                            gpu_memory_used_clone.store(gpu_info.memory_used, Ordering::Release);
136                            gpu_memory_total_clone.store(gpu_info.memory_total, Ordering::Release);
137                        }
138                    }
139
140                    last_update_clone.store(
141                        std::time::SystemTime::now()
142                            .duration_since(std::time::UNIX_EPOCH)
143                            .map(|d| d.as_millis() as u64)
144                            .unwrap_or(0),
145                        Ordering::Release,
146                    );
147
148                    last_refresh = now;
149                }
150
151                thread::sleep(Duration::from_millis(50));
152            }
153        });
154
155        Self {
156            cpu_usage,
157            memory_available,
158            memory_total,
159            disk_read_bps,
160            disk_write_bps,
161            network_rx_bps,
162            network_tx_bps,
163            gpu_usage,
164            gpu_memory_used,
165            gpu_memory_total,
166            last_update,
167            running,
168            handle: Mutex::new(Some(handle)),
169        }
170    }
171
172    pub fn global() -> &'static Self {
173        SYSTEM_MONITOR.get_or_init(Self::new)
174    }
175
176    pub fn shutdown() {
177        if let Some(monitor) = SYSTEM_MONITOR.get() {
178            monitor.running.store(false, Ordering::Release);
179            if let Ok(mut handle_guard) = monitor.handle.lock() {
180                if let Some(handle) = handle_guard.take() {
181                    let _ = handle.join();
182                }
183            }
184        }
185    }
186
187    #[inline]
188    pub fn is_running(&self) -> bool {
189        self.running.load(Ordering::Acquire)
190    }
191
192    #[inline]
193    pub fn cpu_usage(&self) -> f64 {
194        let bits = self.cpu_usage.load(Ordering::Acquire);
195        let value = f64::from_bits(bits);
196        if value.is_nan() || value < 0.0 {
197            0.0
198        } else {
199            value.min(100.0)
200        }
201    }
202
203    #[inline]
204    pub fn memory_available(&self) -> u64 {
205        self.memory_available.load(Ordering::Acquire)
206    }
207
208    #[inline]
209    pub fn memory_total(&self) -> u64 {
210        self.memory_total.load(Ordering::Acquire)
211    }
212
213    #[inline]
214    pub fn memory_used(&self) -> u64 {
215        let total = self.memory_total.load(Ordering::Acquire);
216        let available = self.memory_available.load(Ordering::Acquire);
217        total.saturating_sub(available)
218    }
219
220    #[inline]
221    pub fn memory_usage_percent(&self) -> f64 {
222        let total = self.memory_total.load(Ordering::Acquire);
223        let available = self.memory_available.load(Ordering::Acquire);
224        if total > 0 {
225            ((total - available) as f64 / total as f64) * 100.0
226        } else {
227            0.0
228        }
229    }
230
231    #[inline]
232    pub fn disk_read_bps(&self) -> u64 {
233        self.disk_read_bps.load(Ordering::Acquire)
234    }
235
236    #[inline]
237    pub fn disk_write_bps(&self) -> u64 {
238        self.disk_write_bps.load(Ordering::Acquire)
239    }
240
241    #[inline]
242    pub fn network_rx_bps(&self) -> u64 {
243        self.network_rx_bps.load(Ordering::Acquire)
244    }
245
246    #[inline]
247    pub fn network_tx_bps(&self) -> u64 {
248        self.network_tx_bps.load(Ordering::Acquire)
249    }
250
251    #[inline]
252    pub fn gpu_usage(&self) -> f64 {
253        let bits = self.gpu_usage.load(Ordering::Acquire);
254        f64::from_bits(bits)
255    }
256
257    #[inline]
258    pub fn gpu_memory_used(&self) -> u64 {
259        self.gpu_memory_used.load(Ordering::Acquire)
260    }
261
262    #[inline]
263    pub fn gpu_memory_total(&self) -> u64 {
264        self.gpu_memory_total.load(Ordering::Acquire)
265    }
266
267    #[inline]
268    pub fn gpu_memory_usage_percent(&self) -> f64 {
269        let total = self.gpu_memory_total.load(Ordering::Acquire);
270        let used = self.gpu_memory_used.load(Ordering::Acquire);
271        if total > 0 {
272            (used as f64 / total as f64) * 100.0
273        } else {
274            0.0
275        }
276    }
277
278    #[inline]
279    pub fn last_update(&self) -> u64 {
280        self.last_update.load(Ordering::Acquire)
281    }
282
283    #[inline]
284    pub fn thread_count(&self) -> usize {
285        std::thread::available_parallelism()
286            .map(|p| p.get())
287            .unwrap_or(1)
288    }
289
290    pub fn correlation_analysis(&self) -> CorrelationAnalysis {
291        let cpu = self.cpu_usage();
292        let mem_percent = self.memory_usage_percent();
293        let disk_read = self.disk_read_bps();
294        let disk_write = self.disk_write_bps();
295        let network_rx = self.network_rx_bps();
296        let network_tx = self.network_tx_bps();
297        let gpu = self.gpu_usage();
298
299        let cpu_mem_correlation = if cpu > 0.0 && mem_percent > 0.0 {
300            (cpu / mem_percent).min(2.0)
301        } else {
302            0.0
303        };
304
305        let io_intensity = (disk_read + disk_write) as f64 / 1024.0 / 1024.0;
306        let network_intensity = (network_rx + network_tx) as f64 / 1024.0 / 1024.0;
307
308        CorrelationAnalysis {
309            cpu_memory_correlation: cpu_mem_correlation,
310            io_intensity_mb_per_sec: io_intensity,
311            network_intensity_mb_per_sec: network_intensity,
312            gpu_cpu_ratio: if cpu > 0.0 { gpu / cpu } else { 0.0 },
313            system_load_score: (cpu + mem_percent) / 2.0,
314        }
315    }
316
317    pub fn performance_score(&self) -> PerformanceScore {
318        let cpu = self.cpu_usage();
319        let mem_percent = self.memory_usage_percent();
320        let disk_read = self.disk_read_bps();
321        let disk_write = self.disk_write_bps();
322        let network_rx = self.network_rx_bps();
323        let network_tx = self.network_tx_bps();
324
325        let cpu_efficiency = if cpu < 80.0 {
326            (100.0 - cpu) / 100.0
327        } else {
328            (100.0 - cpu) / 100.0 * 0.5
329        };
330
331        let memory_efficiency = if mem_percent < 80.0 {
332            (100.0 - mem_percent) / 100.0
333        } else {
334            (100.0 - mem_percent) / 100.0 * 0.5
335        };
336
337        let io_throughput = (disk_read + disk_write) as f64 / 1024.0 / 1024.0 / 1024.0;
338        let io_efficiency = (io_throughput / 100.0).min(1.0);
339
340        let network_throughput = (network_rx + network_tx) as f64 / 1024.0 / 1024.0 / 1024.0;
341        let network_efficiency = (network_throughput / 10.0).min(1.0);
342
343        let overall_score = (cpu_efficiency * 0.4
344            + memory_efficiency * 0.3
345            + io_efficiency * 0.2
346            + network_efficiency * 0.1)
347            * 100.0;
348
349        PerformanceScore {
350            cpu_efficiency: cpu_efficiency * 100.0,
351            memory_efficiency: memory_efficiency * 100.0,
352            io_efficiency: io_efficiency * 100.0,
353            network_efficiency: network_efficiency * 100.0,
354            overall_score,
355        }
356    }
357}
358
359impl Drop for SystemMonitor {
360    fn drop(&mut self) {
361        self.running.store(false, Ordering::Release);
362
363        if let Ok(mut handle_guard) = self.handle.lock() {
364            if let Some(handle) = handle_guard.take() {
365                std::thread::spawn(move || {
366                    let timeout = std::time::Duration::from_secs(2);
367                    let start = std::time::Instant::now();
368
369                    while start.elapsed() < timeout {
370                        if handle.is_finished() {
371                            let _ = handle.join();
372                            return;
373                        }
374                        std::thread::sleep(std::time::Duration::from_millis(50));
375                    }
376                });
377            }
378        }
379    }
380}
381
382#[derive(Debug, Clone, Serialize, Deserialize)]
383pub struct CorrelationAnalysis {
384    pub cpu_memory_correlation: f64,
385    pub io_intensity_mb_per_sec: f64,
386    pub network_intensity_mb_per_sec: f64,
387    pub gpu_cpu_ratio: f64,
388    pub system_load_score: f64,
389}
390
391#[derive(Debug, Clone, Serialize, Deserialize)]
392pub struct PerformanceScore {
393    pub cpu_efficiency: f64,
394    pub memory_efficiency: f64,
395    pub io_efficiency: f64,
396    pub network_efficiency: f64,
397    pub overall_score: f64,
398}
399
400#[cfg(target_os = "linux")]
401struct IoStats {
402    read_bytes: u64,
403    write_bytes: u64,
404}
405
406#[cfg(target_os = "linux")]
407struct NetworkStats {
408    rx_bytes: u64,
409    tx_bytes: u64,
410}
411
412#[cfg(target_os = "linux")]
413fn collect_io_stats() -> Result<IoStats, Box<dyn std::error::Error>> {
414    use std::fs;
415
416    let content = fs::read_to_string("/proc/diskstats")?;
417    let mut total_read = 0u64;
418    let mut total_write = 0u64;
419
420    for line in content.lines() {
421        let parts: Vec<&str> = line.split_whitespace().collect();
422        if parts.len() >= 6 {
423            if let Ok(read) = parts[5].parse::<u64>() {
424                total_read += read * 512;
425            }
426            if let Ok(write) = parts[9].parse::<u64>() {
427                total_write += write * 512;
428            }
429        }
430    }
431
432    Ok(IoStats {
433        read_bytes: total_read,
434        write_bytes: total_write,
435    })
436}
437
438#[cfg(target_os = "linux")]
439fn collect_network_stats() -> Result<NetworkStats, Box<dyn std::error::Error>> {
440    use std::fs;
441
442    let content = fs::read_to_string("/proc/net/dev")?;
443    let mut total_rx = 0u64;
444    let mut total_tx = 0u64;
445
446    for line in content.lines().skip(2) {
447        let parts: Vec<&str> = line.split_whitespace().collect();
448        if parts.len() >= 10 {
449            if let Ok(rx) = parts[1].parse::<u64>() {
450                total_rx += rx;
451            }
452            if let Ok(tx) = parts[9].parse::<u64>() {
453                total_tx += tx;
454            }
455        }
456    }
457
458    Ok(NetworkStats {
459        rx_bytes: total_rx,
460        tx_bytes: total_tx,
461    })
462}
463
464#[cfg(target_os = "linux")]
465fn collect_nvidia_gpu() -> Result<GpuInfo, Box<dyn std::error::Error>> {
466    use std::process::Command;
467
468    let output = Command::new("nvidia-smi")
469        .args(&[
470            "--query-gpu=utilization.gpu,memory.used,memory.total",
471            "--format=csv,noheader,nounits",
472        ])
473        .output();
474
475    if let Ok(output) = output {
476        if output.status.success() {
477            let stdout = String::from_utf8_lossy(&output.stdout);
478            let parts: Vec<&str> = stdout.trim().split(',').collect();
479            if parts.len() >= 3 {
480                let usage = parts[0].trim().parse::<f64>()?;
481                let memory_used = parts[1].trim().parse::<u64>()? * 1024 * 1024;
482                let memory_total = parts[2].trim().parse::<u64>()? * 1024 * 1024;
483                return Ok(GpuInfo {
484                    usage,
485                    memory_used,
486                    memory_total,
487                });
488            }
489        }
490    }
491
492    Err("Failed to collect GPU info".into())
493}
494
495#[cfg(target_os = "linux")]
496struct GpuInfo {
497    usage: f64,
498    memory_used: u64,
499    memory_total: u64,
500}
501
502pub fn cpu_usage() -> f64 {
503    SystemMonitor::global().cpu_usage()
504}
505
506pub fn memory_available() -> u64 {
507    SystemMonitor::global().memory_available()
508}
509
510pub fn memory_total() -> u64 {
511    SystemMonitor::global().memory_total()
512}
513
514pub fn memory_used() -> u64 {
515    SystemMonitor::global().memory_used()
516}
517
518pub fn memory_usage_percent() -> f64 {
519    SystemMonitor::global().memory_usage_percent()
520}
521
522pub fn thread_count() -> usize {
523    SystemMonitor::global().thread_count()
524}
525
526pub fn disk_read_bps() -> u64 {
527    SystemMonitor::global().disk_read_bps()
528}
529
530pub fn disk_write_bps() -> u64 {
531    SystemMonitor::global().disk_write_bps()
532}
533
534pub fn network_rx_bps() -> u64 {
535    SystemMonitor::global().network_rx_bps()
536}
537
538pub fn network_tx_bps() -> u64 {
539    SystemMonitor::global().network_tx_bps()
540}
541
542pub fn gpu_usage() -> f64 {
543    SystemMonitor::global().gpu_usage()
544}
545
546pub fn gpu_memory_used() -> u64 {
547    SystemMonitor::global().gpu_memory_used()
548}
549
550pub fn gpu_memory_total() -> u64 {
551    SystemMonitor::global().gpu_memory_total()
552}
553
554pub fn gpu_memory_usage_percent() -> f64 {
555    SystemMonitor::global().gpu_memory_usage_percent()
556}
557
558pub fn correlation_analysis() -> CorrelationAnalysis {
559    SystemMonitor::global().correlation_analysis()
560}
561
562pub fn performance_score() -> PerformanceScore {
563    SystemMonitor::global().performance_score()
564}
565
566#[cfg(test)]
567mod tests {
568    use super::*;
569
570    #[test]
571    fn test_system_monitor() {
572        let monitor = SystemMonitor::global();
573
574        thread::sleep(Duration::from_millis(200));
575
576        let cpu = monitor.cpu_usage();
577        println!("CPU usage: {:.2}%", cpu);
578        assert!((0.0..=100.0).contains(&cpu));
579
580        let mem = monitor.memory_used();
581        println!("Memory used: {} bytes", mem);
582
583        let total = monitor.memory_total();
584        println!("Memory total: {} bytes", total);
585    }
586
587    #[test]
588    fn test_io_monitoring() {
589        thread::sleep(Duration::from_millis(200));
590
591        let disk_read = disk_read_bps();
592        let disk_write = disk_write_bps();
593        let network_rx = network_rx_bps();
594        let network_tx = network_tx_bps();
595
596        println!("Disk I/O: {} read/s, {} write/s", disk_read, disk_write);
597        println!("Network: {} rx/s, {} tx/s", network_rx, network_tx);
598    }
599
600    #[test]
601    fn test_correlation_analysis() {
602        thread::sleep(Duration::from_millis(200));
603
604        let analysis = correlation_analysis();
605        println!(
606            "CPU-Memory correlation: {:.2}",
607            analysis.cpu_memory_correlation
608        );
609        println!(
610            "I/O intensity: {:.2} MB/s",
611            analysis.io_intensity_mb_per_sec
612        );
613        println!(
614            "Network intensity: {:.2} MB/s",
615            analysis.network_intensity_mb_per_sec
616        );
617        println!("System load score: {:.2}", analysis.system_load_score);
618    }
619
620    #[test]
621    fn test_performance_score() {
622        thread::sleep(Duration::from_millis(200));
623
624        let score = performance_score();
625        println!("CPU efficiency: {:.2}%", score.cpu_efficiency);
626        println!("Memory efficiency: {:.2}%", score.memory_efficiency);
627        println!("I/O efficiency: {:.2}%", score.io_efficiency);
628        println!("Network efficiency: {:.2}%", score.network_efficiency);
629        println!("Overall score: {:.2}%", score.overall_score);
630    }
631
632    #[test]
633    fn test_convenience_functions() {
634        thread::sleep(Duration::from_millis(150));
635
636        let cpu = cpu_usage();
637        let mem = memory_used();
638        let total = memory_total();
639        let percent = memory_usage_percent();
640        let threads = thread_count();
641
642        println!("CPU: {:.2}%", cpu);
643        println!("Memory: {} / {} ({:.2}%)", mem, total, percent);
644        println!("Threads: {}", threads);
645    }
646}