sandbox_rs/monitoring/
monitor.rs

1//! Process monitoring via /proc
2//!
3//! Provides real-time monitoring of process resources using /proc filesystem.
4//! Tracks memory usage, CPU time, thread count, and process state.
5
6use std::fs;
7use std::path::Path;
8use std::time::{Duration, Instant};
9
10use nix::sys::signal::{Signal, kill};
11use nix::unistd::Pid;
12
13use crate::errors::{Result, SandboxError};
14
15/// Process state enumeration
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum ProcessState {
18    /// Process is running
19    Running,
20    /// Process is sleeping
21    Sleeping,
22    /// Process is zombie
23    Zombie,
24    /// Process state is unknown
25    Unknown,
26}
27
28impl ProcessState {
29    /// Parse state from /proc stat first character
30    pub fn from_char(c: char) -> Self {
31        match c {
32            'R' => ProcessState::Running,
33            'S' => ProcessState::Sleeping,
34            'Z' => ProcessState::Zombie,
35            _ => ProcessState::Unknown,
36        }
37    }
38}
39
40/// Process statistics snapshot
41#[derive(Debug, Clone)]
42pub struct ProcessStats {
43    /// Process ID
44    pub pid: i32,
45    /// Virtual memory size in bytes
46    pub vsize: u64,
47    /// Resident set size in bytes (physical memory)
48    pub rss: u64,
49    /// RSS in MB (for convenience)
50    pub memory_usage_mb: u64,
51    /// CPU time in milliseconds
52    pub cpu_time_ms: u64,
53    /// Number of threads
54    pub num_threads: u32,
55    /// Current process state
56    pub state: ProcessState,
57    /// Timestamp of this snapshot
58    pub timestamp: Instant,
59}
60
61impl ProcessStats {
62    /// Create stats from /proc data
63    fn from_proc(pid: i32, timestamp: Instant) -> Result<Self> {
64        let stat_path = format!("/proc/{}/stat", pid);
65        let status_path = format!("/proc/{}/status", pid);
66
67        // Read /proc/{pid}/stat
68        let stat_content = fs::read_to_string(&stat_path).map_err(|e| {
69            SandboxError::ProcessMonitoring(format!("Failed to read {}: {}", stat_path, e))
70        })?;
71
72        // Parse stat: pid (comm) state ppid pgrp session tty_nr tpgid flags minflt cminflt majflt cmajflt utime stime cutime cstime priority nice num_threads ...
73        let parts: Vec<&str> = stat_content.split_whitespace().collect();
74        if parts.len() < 20 {
75            return Err(SandboxError::ProcessMonitoring(
76                "Invalid /proc/stat format".to_string(),
77            ));
78        }
79
80        let state = ProcessState::from_char(parts[2].chars().next().unwrap_or('?'));
81        let utime: u64 = parts[13]
82            .parse()
83            .map_err(|_| SandboxError::ProcessMonitoring("Invalid utime".to_string()))?;
84        let stime: u64 = parts[14]
85            .parse()
86            .map_err(|_| SandboxError::ProcessMonitoring("Invalid stime".to_string()))?;
87        let num_threads: u32 = parts[19]
88            .parse()
89            .map_err(|_| SandboxError::ProcessMonitoring("Invalid num_threads".to_string()))?;
90        let vsize: u64 = parts[22]
91            .parse()
92            .map_err(|_| SandboxError::ProcessMonitoring("Invalid vsize".to_string()))?;
93        let rss: u64 = parts[23]
94            .parse()
95            .map_err(|_| SandboxError::ProcessMonitoring("Invalid rss".to_string()))?;
96
97        // Read /proc/{pid}/status for additional info (placeholder for future enhancements)
98        let _status_content = fs::read_to_string(&status_path).unwrap_or_default();
99
100        // Calculate CPU time in milliseconds (utime + stime, assuming 100Hz)
101        // Kernel reports in clock ticks, typically 100Hz on Linux
102        let cpu_time_ms = ((utime + stime) * 10) as u64; // 10ms per tick at 100Hz
103
104        // RSS is in pages, convert to bytes (typically 4KB pages)
105        let rss_bytes = rss * 4096;
106        let memory_usage_mb = rss_bytes / (1024 * 1024);
107
108        Ok(ProcessStats {
109            pid,
110            vsize,
111            rss: rss_bytes,
112            memory_usage_mb,
113            cpu_time_ms,
114            num_threads,
115            state,
116            timestamp,
117        })
118    }
119}
120
121/// Process monitor for tracking sandbox resource usage
122pub struct ProcessMonitor {
123    pid: Pid,
124    creation_time: Instant,
125    peak_memory_mb: u64,
126    last_stats: Option<ProcessStats>,
127}
128
129impl ProcessMonitor {
130    /// Create new monitor for process
131    pub fn new(pid: Pid) -> Result<Self> {
132        // Verify process exists
133        let stat_path = format!("/proc/{}/stat", pid.as_raw());
134        if !Path::new(&stat_path).exists() {
135            return Err(SandboxError::ProcessMonitoring(format!(
136                "Process {} not found",
137                pid
138            )));
139        }
140
141        Ok(ProcessMonitor {
142            pid,
143            creation_time: Instant::now(),
144            peak_memory_mb: 0,
145            last_stats: None,
146        })
147    }
148
149    /// Collect current statistics
150    pub fn collect_stats(&mut self) -> Result<ProcessStats> {
151        let now = Instant::now();
152        let stats = ProcessStats::from_proc(self.pid.as_raw(), now)?;
153
154        // Track peak memory
155        if stats.memory_usage_mb > self.peak_memory_mb {
156            self.peak_memory_mb = stats.memory_usage_mb;
157        }
158
159        self.last_stats = Some(stats.clone());
160        Ok(stats)
161    }
162
163    /// Get peak memory usage since monitor creation (in MB)
164    pub fn peak_memory_mb(&self) -> u64 {
165        self.peak_memory_mb
166    }
167
168    /// Get elapsed time since monitor creation
169    pub fn elapsed(&self) -> Duration {
170        self.creation_time.elapsed()
171    }
172
173    /// Check if process is still alive
174    pub fn is_alive(&self) -> Result<bool> {
175        let stat_path = format!("/proc/{}/stat", self.pid.as_raw());
176        Ok(Path::new(&stat_path).exists())
177    }
178
179    /// Send SIGTERM (graceful shutdown)
180    pub fn send_sigterm(&self) -> Result<()> {
181        kill(self.pid, Signal::SIGTERM)
182            .map_err(|e| SandboxError::Syscall(format!("Failed to send SIGTERM: {}", e)))
183    }
184
185    /// Send SIGKILL (force termination)
186    pub fn send_sigkill(&self) -> Result<()> {
187        kill(self.pid, Signal::SIGKILL)
188            .map_err(|e| SandboxError::Syscall(format!("Failed to send SIGKILL: {}", e)))
189    }
190
191    /// Graceful shutdown: SIGTERM → wait → SIGKILL
192    pub fn graceful_shutdown(&self, wait_duration: Duration) -> Result<()> {
193        // First try SIGTERM
194        self.send_sigterm()?;
195
196        // Wait for process to exit
197        let start = Instant::now();
198        while start.elapsed() < wait_duration && self.is_alive()? {
199            std::thread::sleep(Duration::from_millis(10));
200        }
201
202        // If still alive, SIGKILL
203        if self.is_alive()? {
204            self.send_sigkill()?;
205        }
206
207        Ok(())
208    }
209
210    /// Get last collected stats
211    pub fn last_stats(&self) -> Option<&ProcessStats> {
212        self.last_stats.as_ref()
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    #[test]
221    fn test_process_state_from_char() {
222        assert_eq!(ProcessState::from_char('R'), ProcessState::Running);
223        assert_eq!(ProcessState::from_char('S'), ProcessState::Sleeping);
224        assert_eq!(ProcessState::from_char('Z'), ProcessState::Zombie);
225        assert_eq!(ProcessState::from_char('X'), ProcessState::Unknown);
226    }
227
228    #[test]
229    fn test_process_stats_creation() {
230        // We can at least create stats for the test runner process itself
231        let pid = std::process::id() as i32;
232        let timestamp = Instant::now();
233        let result = ProcessStats::from_proc(pid, timestamp);
234        assert!(result.is_ok());
235
236        if let Ok(stats) = result {
237            assert_eq!(stats.pid, pid);
238            assert!(stats.memory_usage_mb > 0);
239        }
240    }
241
242    #[test]
243    fn test_process_monitor_new() {
244        let pid = Pid::from_raw(std::process::id() as i32);
245        let result = ProcessMonitor::new(pid);
246        assert!(result.is_ok());
247    }
248
249    #[test]
250    fn test_process_monitor_is_alive() {
251        let pid = Pid::from_raw(std::process::id() as i32);
252        let monitor = ProcessMonitor::new(pid).unwrap();
253        assert!(monitor.is_alive().unwrap());
254    }
255
256    #[test]
257    fn test_process_monitor_collect_stats() {
258        let pid = Pid::from_raw(std::process::id() as i32);
259        let mut monitor = ProcessMonitor::new(pid).unwrap();
260        let stats = monitor.collect_stats().unwrap();
261
262        assert_eq!(stats.pid, pid.as_raw());
263        assert!(stats.memory_usage_mb > 0);
264        assert_eq!(monitor.peak_memory_mb(), stats.memory_usage_mb);
265    }
266
267    #[test]
268    fn test_process_monitor_peak_memory() {
269        let pid = Pid::from_raw(std::process::id() as i32);
270        let mut monitor = ProcessMonitor::new(pid).unwrap();
271
272        monitor.collect_stats().unwrap();
273        let peak1 = monitor.peak_memory_mb();
274
275        monitor.collect_stats().unwrap();
276        let peak2 = monitor.peak_memory_mb();
277
278        assert!(peak1 > 0);
279        assert!(peak2 >= peak1);
280    }
281
282    #[test]
283    fn test_process_stats_from_proc_missing_file() {
284        let invalid_pid = 9_999_999i32;
285        let timestamp = Instant::now();
286        let result = ProcessStats::from_proc(invalid_pid, timestamp);
287        assert!(result.is_err());
288    }
289
290    #[test]
291    fn test_process_stats_from_proc_invalid_format() {
292        let pid = std::process::id() as i32;
293        let timestamp = Instant::now();
294        let result = ProcessStats::from_proc(pid, timestamp);
295        assert!(result.is_ok());
296    }
297}