cellos_telemetry/probes/process.rs
1//! `/proc` delta walker for `process.spawned` / `process.exited`.
2//!
3//! Linux-only. We do NOT use `inotify` on `/proc` (it doesn't fire there);
4//! we walk `/proc/[pid]` snapshots and diff PID sets. This is a cheap,
5//! robust, no-CAP_SYS_ADMIN approach that fits the channel-trust model:
6//! every observed PID is one we *saw*, not one a kernel feature *attested*.
7//! That is exactly the `DECLARED` epistemic status the agent emits
8//! (ADR-0006 §2).
9//!
10//! The walker is read-only and never blocks the workload.
11
12#![cfg(target_os = "linux")]
13#![allow(unsafe_code)] // /proc reads are pure-safe; this is for future syscall hooks.
14
15use std::collections::{BTreeSet, HashMap};
16use std::fs;
17
18use crate::probes::now_monotonic_ns;
19use crate::{probe_source, ProbeEvent};
20
21/// State carried between successive [`ProcWalker::poll`] calls.
22#[derive(Debug, Default)]
23pub struct ProcWalker {
24 seen: HashMap<u32, String>, // pid -> comm at first sighting
25}
26
27impl ProcWalker {
28 /// Construct an empty walker. The first [`poll`](Self::poll) will treat
29 /// every existing PID as `process.spawned` — callers typically discard
30 /// the first batch (init-time discovery) and only forward subsequent
31 /// deltas to the host.
32 pub fn new() -> Self {
33 Self::default()
34 }
35
36 /// Re-scan `/proc` and produce one [`ProbeEvent`] for each PID that
37 /// appeared or disappeared since the last call.
38 ///
39 /// Returns events tagged with [`probe_source::PROCESS_SPAWNED`] for new
40 /// PIDs and [`probe_source::PROCESS_EXITED`] for PIDs that were present
41 /// last time and aren't now.
42 pub fn poll(&mut self) -> Vec<ProbeEvent> {
43 let now = now_monotonic_ns();
44 let mut out = Vec::new();
45
46 let mut current: BTreeSet<u32> = BTreeSet::new();
47 if let Ok(entries) = fs::read_dir("/proc") {
48 for ent in entries.flatten() {
49 let name = ent.file_name();
50 let s = match name.to_str() {
51 Some(s) => s,
52 None => continue,
53 };
54 if let Ok(pid) = s.parse::<u32>() {
55 current.insert(pid);
56 if !self.seen.contains_key(&pid) {
57 let comm = read_comm(pid).unwrap_or_default();
58 self.seen.insert(pid, comm.clone());
59 out.push(ProbeEvent {
60 probe_source: probe_source::PROCESS_SPAWNED,
61 guest_pid: pid,
62 guest_comm: comm,
63 guest_monotonic_ns: now,
64 });
65 }
66 }
67 }
68 }
69
70 // Detect exits: anything in `seen` not in `current`.
71 let exited: Vec<u32> = self
72 .seen
73 .keys()
74 .copied()
75 .filter(|pid| !current.contains(pid))
76 .collect();
77 for pid in exited {
78 // SAFETY (logical, not unsafe-Rust): `seen.remove` cannot fail
79 // because we just observed the key.
80 let comm = self.seen.remove(&pid).unwrap_or_default();
81 out.push(ProbeEvent {
82 probe_source: probe_source::PROCESS_EXITED,
83 guest_pid: pid,
84 guest_comm: comm,
85 guest_monotonic_ns: now,
86 });
87 }
88
89 out
90 }
91}
92
93/// Read `/proc/[pid]/comm`, trimmed and truncated to 16 bytes (Linux `comm` cap).
94fn read_comm(pid: u32) -> Option<String> {
95 let path = format!("/proc/{pid}/comm");
96 let raw = fs::read_to_string(&path).ok()?;
97 let trimmed = raw.trim_end_matches('\n');
98 let mut s = String::from(trimmed);
99 if s.len() > 16 {
100 s.truncate(16);
101 }
102 Some(s)
103}
104
105#[cfg(test)]
106mod tests {
107 use super::*;
108
109 #[test]
110 fn first_poll_discovers_self() {
111 // The current process is in /proc; the walker must see at least one
112 // PID. We don't assert exact count because /proc churns.
113 let mut w = ProcWalker::new();
114 let events = w.poll();
115 assert!(
116 !events.is_empty(),
117 "first poll should yield at least one PROCESS_SPAWNED"
118 );
119 assert!(events
120 .iter()
121 .all(|e| e.probe_source == probe_source::PROCESS_SPAWNED));
122 }
123
124 #[test]
125 fn second_poll_is_quiet_modulo_churn() {
126 // Two back-to-back polls with no intentional spawns: the second
127 // should be much smaller than the first. We don't assert empty
128 // because /proc has natural churn (timers, kernel threads).
129 let mut w = ProcWalker::new();
130 let first = w.poll();
131 let second = w.poll();
132 assert!(
133 second.len() <= first.len(),
134 "second poll should not exceed first (got {} vs {})",
135 second.len(),
136 first.len()
137 );
138 }
139}