cellos-telemetry 0.5.0

In-guest telemetry agent for CellOS โ€” runs as PID 2 inside Firecracker microVMs, emits CBOR-over-vsock observations. No signing key by design (ADR-0006).
Documentation
//! `/proc` delta walker for `process.spawned` / `process.exited`.
//!
//! Linux-only. We do NOT use `inotify` on `/proc` (it doesn't fire there);
//! we walk `/proc/[pid]` snapshots and diff PID sets. This is a cheap,
//! robust, no-CAP_SYS_ADMIN approach that fits the channel-trust model:
//! every observed PID is one we *saw*, not one a kernel feature *attested*.
//! That is exactly the `DECLARED` epistemic status the agent emits
//! (ADR-0006 ยง2).
//!
//! The walker is read-only and never blocks the workload.

#![cfg(target_os = "linux")]
#![allow(unsafe_code)] // /proc reads are pure-safe; this is for future syscall hooks.

use std::collections::{BTreeSet, HashMap};
use std::fs;

use crate::probes::now_monotonic_ns;
use crate::{probe_source, ProbeEvent};

/// State carried between successive [`ProcWalker::poll`] calls.
#[derive(Debug, Default)]
pub struct ProcWalker {
    seen: HashMap<u32, String>, // pid -> comm at first sighting
}

impl ProcWalker {
    /// Construct an empty walker. The first [`poll`](Self::poll) will treat
    /// every existing PID as `process.spawned` โ€” callers typically discard
    /// the first batch (init-time discovery) and only forward subsequent
    /// deltas to the host.
    pub fn new() -> Self {
        Self::default()
    }

    /// Re-scan `/proc` and produce one [`ProbeEvent`] for each PID that
    /// appeared or disappeared since the last call.
    ///
    /// Returns events tagged with [`probe_source::PROCESS_SPAWNED`] for new
    /// PIDs and [`probe_source::PROCESS_EXITED`] for PIDs that were present
    /// last time and aren't now.
    pub fn poll(&mut self) -> Vec<ProbeEvent> {
        let now = now_monotonic_ns();
        let mut out = Vec::new();

        let mut current: BTreeSet<u32> = BTreeSet::new();
        if let Ok(entries) = fs::read_dir("/proc") {
            for ent in entries.flatten() {
                let name = ent.file_name();
                let s = match name.to_str() {
                    Some(s) => s,
                    None => continue,
                };
                if let Ok(pid) = s.parse::<u32>() {
                    current.insert(pid);
                    if !self.seen.contains_key(&pid) {
                        let comm = read_comm(pid).unwrap_or_default();
                        self.seen.insert(pid, comm.clone());
                        out.push(ProbeEvent {
                            probe_source: probe_source::PROCESS_SPAWNED,
                            guest_pid: pid,
                            guest_comm: comm,
                            guest_monotonic_ns: now,
                        });
                    }
                }
            }
        }

        // Detect exits: anything in `seen` not in `current`.
        let exited: Vec<u32> = self
            .seen
            .keys()
            .copied()
            .filter(|pid| !current.contains(pid))
            .collect();
        for pid in exited {
            // SAFETY (logical, not unsafe-Rust): `seen.remove` cannot fail
            // because we just observed the key.
            let comm = self.seen.remove(&pid).unwrap_or_default();
            out.push(ProbeEvent {
                probe_source: probe_source::PROCESS_EXITED,
                guest_pid: pid,
                guest_comm: comm,
                guest_monotonic_ns: now,
            });
        }

        out
    }
}

/// Read `/proc/[pid]/comm`, trimmed and truncated to 16 bytes (Linux `comm` cap).
fn read_comm(pid: u32) -> Option<String> {
    let path = format!("/proc/{pid}/comm");
    let raw = fs::read_to_string(&path).ok()?;
    let trimmed = raw.trim_end_matches('\n');
    let mut s = String::from(trimmed);
    if s.len() > 16 {
        s.truncate(16);
    }
    Some(s)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn first_poll_discovers_self() {
        // The current process is in /proc; the walker must see at least one
        // PID. We don't assert exact count because /proc churns.
        let mut w = ProcWalker::new();
        let events = w.poll();
        assert!(
            !events.is_empty(),
            "first poll should yield at least one PROCESS_SPAWNED"
        );
        assert!(events
            .iter()
            .all(|e| e.probe_source == probe_source::PROCESS_SPAWNED));
    }

    #[test]
    fn second_poll_is_quiet_modulo_churn() {
        // Two back-to-back polls with no intentional spawns: the second
        // should be much smaller than the first. We don't assert empty
        // because /proc has natural churn (timers, kernel threads).
        let mut w = ProcWalker::new();
        let first = w.poll();
        let second = w.poll();
        assert!(
            second.len() <= first.len(),
            "second poll should not exceed first (got {} vs {})",
            second.len(),
            first.len()
        );
    }
}