Skip to main content

cellos_telemetry/probes/
process.rs

1//! `/proc` delta walker for `process.spawned` / `process.exited`.
2//!
3//! Linux-only. We do NOT use `inotify` on `/proc` (it doesn't fire there);
4//! we walk `/proc/[pid]` snapshots and diff PID sets. This is a cheap,
5//! robust, no-CAP_SYS_ADMIN approach that fits the channel-trust model:
6//! every observed PID is one we *saw*, not one a kernel feature *attested*.
7//! That is exactly the `DECLARED` epistemic status the agent emits
8//! (ADR-0006 §2).
9//!
10//! The walker is read-only and never blocks the workload.
11
12#![cfg(target_os = "linux")]
13#![allow(unsafe_code)] // /proc reads are pure-safe; this is for future syscall hooks.
14
15use std::collections::{BTreeSet, HashMap};
16use std::fs;
17
18use crate::probes::now_monotonic_ns;
19use crate::{probe_source, ProbeEvent};
20
21/// State carried between successive [`ProcWalker::poll`] calls.
22#[derive(Debug, Default)]
23pub struct ProcWalker {
24    seen: HashMap<u32, String>, // pid -> comm at first sighting
25}
26
27impl ProcWalker {
28    /// Construct an empty walker. The first [`poll`](Self::poll) will treat
29    /// every existing PID as `process.spawned` — callers typically discard
30    /// the first batch (init-time discovery) and only forward subsequent
31    /// deltas to the host.
32    pub fn new() -> Self {
33        Self::default()
34    }
35
36    /// Re-scan `/proc` and produce one [`ProbeEvent`] for each PID that
37    /// appeared or disappeared since the last call.
38    ///
39    /// Returns events tagged with [`probe_source::PROCESS_SPAWNED`] for new
40    /// PIDs and [`probe_source::PROCESS_EXITED`] for PIDs that were present
41    /// last time and aren't now.
42    pub fn poll(&mut self) -> Vec<ProbeEvent> {
43        let now = now_monotonic_ns();
44        let mut out = Vec::new();
45
46        let mut current: BTreeSet<u32> = BTreeSet::new();
47        if let Ok(entries) = fs::read_dir("/proc") {
48            for ent in entries.flatten() {
49                let name = ent.file_name();
50                let s = match name.to_str() {
51                    Some(s) => s,
52                    None => continue,
53                };
54                if let Ok(pid) = s.parse::<u32>() {
55                    current.insert(pid);
56                    if !self.seen.contains_key(&pid) {
57                        let comm = read_comm(pid).unwrap_or_default();
58                        self.seen.insert(pid, comm.clone());
59                        out.push(ProbeEvent {
60                            probe_source: probe_source::PROCESS_SPAWNED,
61                            guest_pid: pid,
62                            guest_comm: comm,
63                            guest_monotonic_ns: now,
64                        });
65                    }
66                }
67            }
68        }
69
70        // Detect exits: anything in `seen` not in `current`.
71        let exited: Vec<u32> = self
72            .seen
73            .keys()
74            .copied()
75            .filter(|pid| !current.contains(pid))
76            .collect();
77        for pid in exited {
78            // SAFETY (logical, not unsafe-Rust): `seen.remove` cannot fail
79            // because we just observed the key.
80            let comm = self.seen.remove(&pid).unwrap_or_default();
81            out.push(ProbeEvent {
82                probe_source: probe_source::PROCESS_EXITED,
83                guest_pid: pid,
84                guest_comm: comm,
85                guest_monotonic_ns: now,
86            });
87        }
88
89        out
90    }
91}
92
93/// Read `/proc/[pid]/comm`, trimmed and truncated to 16 bytes (Linux `comm` cap).
94fn read_comm(pid: u32) -> Option<String> {
95    let path = format!("/proc/{pid}/comm");
96    let raw = fs::read_to_string(&path).ok()?;
97    let trimmed = raw.trim_end_matches('\n');
98    let mut s = String::from(trimmed);
99    if s.len() > 16 {
100        s.truncate(16);
101    }
102    Some(s)
103}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    #[test]
110    fn first_poll_discovers_self() {
111        // The current process is in /proc; the walker must see at least one
112        // PID. We don't assert exact count because /proc churns.
113        let mut w = ProcWalker::new();
114        let events = w.poll();
115        assert!(
116            !events.is_empty(),
117            "first poll should yield at least one PROCESS_SPAWNED"
118        );
119        assert!(events
120            .iter()
121            .all(|e| e.probe_source == probe_source::PROCESS_SPAWNED));
122    }
123
124    #[test]
125    fn second_poll_is_quiet_modulo_churn() {
126        // Two back-to-back polls with no intentional spawns: the second
127        // should be much smaller than the first. We don't assert empty
128        // because /proc has natural churn (timers, kernel threads).
129        let mut w = ProcWalker::new();
130        let first = w.poll();
131        let second = w.poll();
132        assert!(
133            second.len() <= first.len(),
134            "second poll should not exceed first (got {} vs {})",
135            second.len(),
136            first.len()
137        );
138    }
139}