Skip to main content

fsmon/
proc_cache.rs

1//! Proc Connector Process Cache + Process Tree
2//!
3//! Two data structures:
4//! - **ProcCache**: PID → {cmd, user, ppid, tgid} from Exec events (existing)
5//! - **PidTree**: PID → {ppid, cmd} for ancestor lookups (new, handles Fork/Exec/Exit)
6//!
7//! The PidTree is populated from three sources:
8//! 1. Startup snapshot: `/proc/*/status` → seed existing processes
9//! 2. Fork events: parent→child relationship (no cmd yet)
10//! 3. Exec events: update cmd for the child
11
12use std::time::Duration;
13
14use moka::sync::Cache;
15use proc_connector::{NetlinkMessageIter, ProcConnector, ProcEvent};
16
17use crate::utils::uid_to_username;
18
19// ---- ProcCache (existing) ----
20
21/// Cached process info: command name, user, ppid, tgid.
22#[derive(Clone, Debug)]
23pub struct ProcInfo {
24    pub cmd: String,
25    pub user: String,
26    pub ppid: u32,
27    pub tgid: u32,
28    pub start_time_ns: u64,
29}
30
31/// Capacity for process info cache.
32/// Covers typical active PID ranges with headroom.
33pub const PROC_CACHE_CAP: u64 = 65536;
34
35/// TTL for process info entries. Exited processes are evicted after this time.
36pub const PROC_CACHE_TTL_SECS: u64 = 600;
37
38/// Capacity for process tree cache.
39pub const PID_TREE_CAP: u64 = 65536;
40
41/// TTL for process tree entries.
42pub const PID_TREE_TTL_SECS: u64 = 600;
43
44/// Shared PID → ProcInfo cache (thread-safe, bounded, TTL-based eviction).
45pub type ProcCache = Cache<u32, ProcInfo>;
46
47/// Create a new ProcCache with the configured capacity and TTL.
48pub fn new_cache() -> ProcCache {
49    new_cache_with(CacheParams::default())
50}
51
52/// Create a ProcCache with explicit capacity and TTL overrides.
53pub fn new_cache_with(params: CacheParams) -> ProcCache {
54    Cache::builder()
55        .max_capacity(params.capacity)
56        .time_to_live(Duration::from_secs(params.ttl_secs))
57        .build()
58}
59
60/// Parameters for process caches (ProcCache and PidTree).
61pub struct CacheParams {
62    pub capacity: u64,
63    pub ttl_secs: u64,
64}
65
66impl Default for CacheParams {
67    fn default() -> Self {
68        Self {
69            capacity: PROC_CACHE_CAP,
70            ttl_secs: PROC_CACHE_TTL_SECS,
71        }
72    }
73}
74
75// ---- PidTree ----
76
77/// A node in the process tree. cmd starts empty (from Fork) and fills on Exec.
78#[derive(Clone, Debug)]
79pub struct PidNode {
80    pub ppid: u32,
81    pub cmd: String,
82    pub start_time_ns: u64,
83}
84
85/// Shared process tree: PID → parent PID + cmd (bounded, TTL-based eviction).
86pub type PidTree = Cache<u32, PidNode>;
87
88/// Create a new PidTree with the configured capacity and TTL.
89pub fn new_pid_tree() -> PidTree {
90    new_pid_tree_with(CacheParams::default())
91}
92
93/// Create a PidTree with explicit capacity and TTL overrides.
94pub fn new_pid_tree_with(params: CacheParams) -> PidTree {
95    Cache::builder()
96        .max_capacity(params.capacity)
97        .time_to_live(Duration::from_secs(params.ttl_secs))
98        .build()
99}
100
101/// Snapshot all existing processes from /proc on daemon start.
102/// Reads `/proc/*/status` to seed the tree with current PIDs and their ppid/cmd.
103pub fn snapshot_process_tree(tree: &PidTree) {
104    let dir = match std::fs::read_dir("/proc") {
105        Ok(d) => d,
106        Err(e) => {
107            eprintln!("[WARNING] Cannot read /proc for process tree snapshot: {e}");
108            return;
109        }
110    };
111    for entry in dir.flatten() {
112        let name = entry.file_name();
113        let name_str = name.to_string_lossy();
114        let pid: u32 = match name_str.parse() {
115            Ok(p) => p,
116            Err(_) => continue,
117        };
118        let status = match std::fs::read_to_string(format!("/proc/{}/status", pid)) {
119            Ok(s) => s,
120            Err(_) => continue,
121        };
122        let mut ppid = 0u32;
123        let mut cmd = String::new();
124        for line in status.lines() {
125            if let Some(val) = line.strip_prefix("PPid:") {
126                ppid = val.trim().parse().unwrap_or(0);
127            } else if let Some(val) = line.strip_prefix("Name:") {
128                cmd = val.trim().to_string();
129            }
130        }
131        tree.insert(pid, PidNode { ppid, cmd, start_time_ns: 0 });
132    }
133}
134
135pub fn read_proc_start_time_ns(pid: u32) -> u64 {
136    let stat = match std::fs::read_to_string(format!("/proc/{pid}/stat")) {
137        Ok(s) => s,
138        Err(_) => return 0,
139    };
140    let after_comm = match stat.rfind(") ") {
141        Some(pos) => pos + 2,
142        None => return 0,
143    };
144    let mut rest = &stat[after_comm..];
145    for _ in 0..19 {
146        if let Some(pos) = rest.find(' ') {
147            rest = &rest[pos + 1..];
148        } else {
149            return 0;
150        }
151    }
152    let starttime_jiffies: u64 = match rest.split_whitespace().next() {
153        Some(s) => s.parse().unwrap_or(0),
154        None => return 0,
155    };
156    if starttime_jiffies == 0 {
157        return 0;
158    }
159    let clk_tck = unsafe { libc::sysconf(libc::_SC_CLK_TCK) };
160    if clk_tck <= 0 {
161        return 0;
162    }
163    (starttime_jiffies as u128 * 1_000_000_000 / clk_tck as u128) as u64
164}
165
166/// Check if `pid` is a descendant of any process whose cmd == `target_cmd`.
167/// Walks up the tree via ppid until hitting root (pid=1, pid=0, self-loop, or cycle).
168pub fn is_descendant(tree: &PidTree, pid: u32, target_cmd: &str) -> bool {
169    let mut current = pid;
170    let mut visited = std::collections::HashSet::new();
171    while let Some(node) = tree.get(&current) {
172        if !visited.insert(current) {
173            break; // cycle detected
174        }
175        if node.cmd == target_cmd {
176            return true;
177        }
178        if node.ppid == 0 || current == node.ppid {
179            break;
180        }
181        current = node.ppid;
182    }
183    false
184}
185
186/// Build a chain string from the process tree.
187/// Format: "102|touch|root;101|sh|root;100|openclaw|root;1|systemd|root"
188/// Falls back to reading /proc if a PID is not in the tree.
189pub fn build_chain(tree: &PidTree, cache: &ProcCache, pid: u32) -> String {
190    let mut parts: Vec<String> = Vec::new();
191    let mut current = pid;
192    let mut visited = std::collections::HashSet::new();
193    loop {
194        // Try tree first for ppid, then cache for user
195        let (ppid, cmd, user) = if let Some(node) = tree.get(&current) {
196            let user = cache
197                .get(&current)
198                .map(|info| info.user.clone())
199                .unwrap_or_else(|| "unknown".to_string());
200            (node.ppid, node.cmd.clone(), user)
201        } else {
202            // Fallback to /proc/{pid}/status
203            let status = match std::fs::read_to_string(format!("/proc/{}/status", current)) {
204                Ok(s) => s,
205                Err(_) => {
206                    parts.push(format!("{}|unknown|unknown", current));
207                    break;
208                }
209            };
210            let cmd = status
211                .lines()
212                .find(|l| l.starts_with("Name:"))
213                .and_then(|l| l.split(':').nth(1))
214                .map(|s| s.trim().to_string())
215                .unwrap_or_else(|| "unknown".to_string());
216            let ppid = status
217                .lines()
218                .find(|l| l.starts_with("PPid:"))
219                .and_then(|l| l.split_whitespace().nth(1))
220                .and_then(|s| s.parse::<u32>().ok())
221                .unwrap_or(0);
222            let user = status
223                .lines()
224                .find(|l| l.starts_with("Uid:"))
225                .and_then(|l| l.split_whitespace().nth(1))
226                .and_then(|uid_str| uid_str.parse::<u32>().ok())
227                .and_then(uid_to_username)
228                .unwrap_or_else(|| "unknown".to_string());
229            (ppid, cmd, user)
230        };
231
232        parts.push(format!("{}|{}|{}", current, cmd, user));
233        if ppid == 0 || current == ppid {
234            break;
235        }
236        if !visited.insert(current) {
237            break; // cycle detected
238        }
239        current = ppid;
240    }
241    parts.join(";")
242}
243
244// ---- Proc Connector ----
245
246pub fn try_create_connector() -> Option<ProcConnector> {
247    let conn = match ProcConnector::new() {
248        Ok(c) => c,
249        Err(e) => {
250            eprintln!(
251                "[WARNING] Failed to create proc connector: {e}. \
252                       Process tree tracking will be unavailable."
253            );
254            return None;
255        }
256    };
257    if let Err(e) = conn.set_nonblocking() {
258        eprintln!("[WARNING] Failed to set proc connector non-blocking: {e}");
259        return None;
260    }
261    Some(conn)
262}
263
264/// Process proc connector events.
265/// Handles Exec (update ProcCache + PidTree cmd), Fork (insert PidTree),
266/// and Exit (optional, no cleanup needed for correct lookups).
267pub fn handle_proc_events(cache: &ProcCache, tree: &PidTree, data: &[u8], n: usize) -> bool {
268    let mut processed = false;
269    for msg in NetlinkMessageIter::new(data, n) {
270        match msg {
271            Ok(Some(ProcEvent::Exec { pid, timestamp_ns, .. })) => {
272                let cmd = std::fs::read_to_string(format!("/proc/{}/comm", pid))
273                    .ok()
274                    .map(|s| s.trim().to_string())
275                    .unwrap_or_else(|| "unknown".to_string());
276
277                let (user, ppid, tgid) =
278                    read_proc_info(pid).unwrap_or_else(|| ("unknown".to_string(), 0, 0));
279
280                cache.insert(
281                    pid,
282                    ProcInfo {
283                        cmd: cmd.clone(),
284                        user,
285                        ppid,
286                        tgid,
287                        start_time_ns: timestamp_ns,
288                    },
289                );
290
291                // Also update PidTree with the resolved cmd/ppid
292                tree.insert(
293                    pid,
294                    PidNode {
295                        ppid,
296                        cmd,
297                        start_time_ns: timestamp_ns,
298                    },
299                );
300
301                processed = true;
302            }
303            Ok(Some(ProcEvent::Fork {
304                child_pid,
305                parent_pid,
306                timestamp_ns,
307                ..
308            })) => {
309                // Pre-populate tree: we know the parent but not cmd yet
310                tree.insert(
311                    child_pid,
312                    PidNode {
313                        ppid: parent_pid,
314                        cmd: String::new(),
315                        start_time_ns: timestamp_ns,
316                    },
317                );
318                processed = true;
319            }
320            Ok(Some(ProcEvent::Exit { .. })) => {
321                // Keep the node — it's still valid for historical chain lookups
322                // of events that happened before this process exited.
323                processed = true;
324            }
325            Ok(Some(_)) => {} // Uid, Gid, Sid, etc. — ignore
326            Ok(None) => {}    // Control message (NLMSG_NOOP, NLMSG_DONE, NLMSG_ERROR-ACK)
327            Err(proc_connector::Error::Overrun) => {
328                eprintln!("[WARNING] proc connector overrun — some exec events may have been lost");
329            }
330            Err(proc_connector::Error::Truncated) => {
331                eprintln!("[WARNING] proc connector truncated message, continuing...");
332            }
333            Err(e) => {
334                eprintln!("proc connector parse error: {e}");
335            }
336        }
337    }
338    processed
339}
340
341fn read_proc_info(pid: u32) -> Option<(String, u32, u32)> {
342    let status = std::fs::read_to_string(format!("/proc/{}/status", pid)).ok()?;
343    let mut user = String::new();
344    let mut ppid = 0u32;
345    let mut tgid = 0u32;
346    for line in status.lines() {
347        if let Some(val) = line.strip_prefix("Uid:") {
348            let uid: u32 = val.split_whitespace().next()?.parse().ok()?;
349            user = uid_to_username(uid).unwrap_or_else(|| "unknown".to_string());
350        } else if let Some(val) = line.strip_prefix("PPid:") {
351            ppid = val.trim().parse().ok()?;
352        } else if let Some(val) = line.strip_prefix("Tgid:") {
353            tgid = val.trim().parse().ok()?;
354        }
355    }
356    Some((user, ppid, tgid))
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[test]
364    fn test_proc_cache_insert_and_get() {
365        let cache = new_cache();
366        cache.insert(
367            12345,
368            ProcInfo {
369                cmd: "test_process".into(),
370                user: "testuser".into(),
371                ppid: 1,
372                tgid: 12345,
373            start_time_ns: 0,
374            },
375        );
376        let info = cache.get(&12345).unwrap();
377        assert_eq!(info.cmd, "test_process");
378        assert_eq!(info.ppid, 1);
379        assert_eq!(info.tgid, 12345);
380    }
381
382    #[test]
383    fn test_is_descendant() {
384        let tree = new_pid_tree();
385        tree.insert(
386            1,
387            PidNode {
388                ppid: 0,
389                cmd: "systemd".into(),
390            start_time_ns: 0,
391            },
392        );
393        tree.insert(
394            100,
395            PidNode {
396                ppid: 1,
397                cmd: "openclaw".into(),
398            start_time_ns: 0,
399            },
400        );
401        tree.insert(
402            101,
403            PidNode {
404                ppid: 100,
405                cmd: "sh".into(),
406            start_time_ns: 0,
407            },
408        );
409        tree.insert(
410            102,
411            PidNode {
412                ppid: 101,
413                cmd: String::new(),
414            start_time_ns: 0,
415            },
416        ); // Fork, no Exec yet
417
418        assert!(is_descendant(&tree, 102, "openclaw"));
419        assert!(is_descendant(&tree, 101, "openclaw"));
420        assert!(is_descendant(&tree, 100, "openclaw"));
421        assert!(!is_descendant(&tree, 102, "nginx"));
422        assert!(!is_descendant(&tree, 1, "openclaw"));
423    }
424
425    #[test]
426    fn test_is_descendant_unknown_pid() {
427        let tree = new_pid_tree();
428        tree.insert(
429            1,
430            PidNode {
431                ppid: 0,
432                cmd: "systemd".into(),
433            start_time_ns: 0,
434            },
435        );
436        assert!(!is_descendant(&tree, 99999, "systemd"));
437    }
438
439    #[test]
440    fn test_is_descendant_cycle() {
441        // Complex cycle: A→B→C→A. is_descendant must not infinite-loop.
442        let tree = new_pid_tree();
443        tree.insert(1, PidNode { ppid: 2, cmd: "a".into(), start_time_ns: 0 });
444        tree.insert(2, PidNode { ppid: 3, cmd: "b".into(), start_time_ns: 0 });
445        tree.insert(3, PidNode { ppid: 1, cmd: "c".into(), start_time_ns: 0 });
446        // Should detect cycle and return false (no matching cmd)
447        assert!(!is_descendant(&tree, 1, "nginx"));
448    }
449
450    #[test]
451    fn test_build_chain_cycle() {
452        // Complex cycle: 1→2→3→1. build_chain must not infinite-loop.
453        let tree = new_pid_tree();
454        let cache = new_cache();
455        tree.insert(1, PidNode { ppid: 2, cmd: "a".into(), start_time_ns: 0 });
456        tree.insert(2, PidNode { ppid: 3, cmd: "b".into(), start_time_ns: 0 });
457        tree.insert(3, PidNode { ppid: 1, cmd: "c".into(), start_time_ns: 0 });
458        cache.insert(1, ProcInfo { cmd: "a".into(), user: "u".into(), ppid: 2, tgid: 1, start_time_ns: 0 });
459        cache.insert(2, ProcInfo { cmd: "b".into(), user: "u".into(), ppid: 3, tgid: 2, start_time_ns: 0 });
460        cache.insert(3, ProcInfo { cmd: "c".into(), user: "u".into(), ppid: 1, tgid: 3, start_time_ns: 0 });
461        let chain = build_chain(&tree, &cache, 1);
462        // Should produce partial chain without infinite loop
463        assert!(!chain.is_empty());
464        assert!(chain.starts_with("1|"));
465    }
466
467    #[test]
468    fn test_build_chain_from_tree() {
469        let tree = new_pid_tree();
470        let cache = new_cache();
471        tree.insert(
472            1,
473            PidNode {
474                ppid: 0,
475                cmd: "systemd".into(),
476            start_time_ns: 0,
477            },
478        );
479        cache.insert(
480            1,
481            ProcInfo {
482                cmd: "systemd".into(),
483                user: "root".into(),
484                ppid: 0,
485                tgid: 1,
486            start_time_ns: 0,
487            },
488        );
489        tree.insert(
490            100,
491            PidNode {
492                ppid: 1,
493                cmd: "openclaw".into(),
494            start_time_ns: 0,
495            },
496        );
497        cache.insert(
498            100,
499            ProcInfo {
500                cmd: "openclaw".into(),
501                user: "root".into(),
502                ppid: 1,
503                tgid: 100,
504            start_time_ns: 0,
505            },
506        );
507        tree.insert(
508            101,
509            PidNode {
510                ppid: 100,
511                cmd: "sh".into(),
512            start_time_ns: 0,
513            },
514        );
515        cache.insert(
516            101,
517            ProcInfo {
518                cmd: "sh".into(),
519                user: "root".into(),
520                ppid: 100,
521                tgid: 101,
522            start_time_ns: 0,
523            },
524        );
525        tree.insert(
526            102,
527            PidNode {
528                ppid: 101,
529                cmd: "touch".into(),
530            start_time_ns: 0,
531            },
532        );
533        cache.insert(
534            102,
535            ProcInfo {
536                cmd: "touch".into(),
537                user: "root".into(),
538                ppid: 101,
539                tgid: 102,
540            start_time_ns: 0,
541            },
542        );
543
544        let chain = build_chain(&tree, &cache, 102);
545        assert_eq!(
546            chain,
547            "102|touch|root;101|sh|root;100|openclaw|root;1|systemd|root"
548        );
549    }
550
551    #[test]
552    fn test_build_chain_single() {
553        let tree = new_pid_tree();
554        let cache = new_cache();
555        tree.insert(
556            1,
557            PidNode {
558                ppid: 0,
559                cmd: "systemd".into(),
560            start_time_ns: 0,
561            },
562        );
563        cache.insert(
564            1,
565            ProcInfo {
566                cmd: "systemd".into(),
567                user: "root".into(),
568                ppid: 0,
569                tgid: 1,
570            start_time_ns: 0,
571            },
572        );
573
574        let chain = build_chain(&tree, &cache, 1);
575        assert_eq!(chain, "1|systemd|root");
576    }
577
578    #[test]
579    fn test_snapshot_pid1() {
580        // PID 1 always exists on Linux
581        let tree = new_pid_tree();
582        snapshot_process_tree(&tree);
583        assert!(tree.contains_key(&1), "PID 1 should exist after snapshot");
584        if let Some(node) = tree.get(&1) {
585            assert!(!node.cmd.is_empty(), "PID 1 should have a cmd");
586            assert_eq!(node.ppid, 0, "PID 1\'s ppid should be 0");
587        }
588    }
589
590    #[test]
591    fn test_read_proc_start_time_ns_pid1() {
592        // PID 1 always exists on Linux — should have a non-zero start time.
593        let ns = read_proc_start_time_ns(1);
594        assert!(ns > 0, "PID 1 start_time_ns should be > 0, got {ns}");
595    }
596
597    #[test]
598    fn test_read_proc_start_time_ns_nonexistent() {
599        // A non-existent PID should return 0.
600        // Use PID 0x7FFFFFFF (max valid PID on most systems is lower).
601        let ns = read_proc_start_time_ns(0x7FFFFFFF);
602        assert_eq!(ns, 0, "non-existent PID should return 0, got {ns}");
603    }
604}