Skip to main content

trusty_common/
sys_metrics.rs

1//! Process resident-memory (RSS) and CPU sampling for daemon `/health`.
2//!
3//! Why: Every trusty-* daemon wants to report its own RSS and CPU usage on
4//!      its health endpoint, and the sampling logic (resolve our PID, refresh
5//!      only this process, convert units) is identical across them.
6//!      Centralising it here avoids three near-identical copies drifting.
7//! What: [`SysMetrics`] wraps a `sysinfo::System` scoped to the current
8//!      process. [`SysMetrics::sample`] refreshes and returns
9//!      `(rss_mb, cpu_pct)`. CPU usage is a delta between two refreshes, so
10//!      the *first* sample reports `0.0`; subsequent samples report the
11//!      usage observed since the previous call. Callers polling `/health`
12//!      every ~2 s get meaningful CPU readings without any background task.
13//! Test: see the `tests` module — `sample_does_not_panic` exercises the
14//!      refresh path; `rss_is_plausible` asserts the test process reports a
15//!      non-trivial, non-absurd RSS.
16
17use sysinfo::{Pid, ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System};
18
19/// Per-process RSS + CPU sampler bound to the current process.
20///
21/// Why: holding the `System` between calls is required for CPU measurement —
22///      `sysinfo` derives CPU% from the delta in consumed CPU time between
23///      two refreshes, so the same instance must be reused.
24/// What: stores the long-lived `System` and our own `Pid`. Not `Clone` — it
25///      carries mutable sampling state; share it behind a `Mutex` if multiple
26///      handlers need it.
27/// Test: `sample_does_not_panic`, `rss_is_plausible`.
28pub struct SysMetrics {
29    sys: System,
30    pid: Pid,
31}
32
33impl SysMetrics {
34    /// Construct a sampler for the current process.
35    ///
36    /// Why: the daemon builds one of these at startup and samples it on each
37    ///      `/health` request.
38    /// What: resolves `std::process::id()` into a `sysinfo::Pid` and creates a
39    ///      `System` configured to refresh only process memory + CPU (not the
40    ///      whole machine), then performs one priming refresh so the next
41    ///      `sample` call has a baseline for the CPU delta.
42    /// Test: `sample_does_not_panic`.
43    #[must_use]
44    pub fn new() -> Self {
45        let pid = Pid::from_u32(std::process::id());
46        let mut sys = System::new_with_specifics(
47            RefreshKind::nothing()
48                .with_processes(ProcessRefreshKind::nothing().with_memory().with_cpu()),
49        );
50        // Prime the CPU baseline — the first delta-based reading after this
51        // will be meaningful rather than a spurious 0/huge value.
52        sys.refresh_processes_specifics(
53            ProcessesToUpdate::Some(&[pid]),
54            true,
55            ProcessRefreshKind::nothing().with_memory().with_cpu(),
56        );
57        Self { sys, pid }
58    }
59
60    /// Refresh and return `(rss_mb, cpu_pct)` for the current process.
61    ///
62    /// Why: the `/health` handler calls this once per request. Polling more
63    ///      often than ~once per 500 ms yields noisy CPU readings because the
64    ///      delta window shrinks; `/health` is typically polled every 2 s so
65    ///      this is not a concern in practice.
66    /// What: refreshes this process's memory + CPU stats. Returns RSS in
67    ///      whole megabytes (`bytes / 1_048_576`) and CPU as a percentage
68    ///      where `100.0` means one fully-saturated core (sysinfo's
69    ///      convention — a process on 4 cores can exceed 100). If the process
70    ///      cannot be resolved (extremely rare; only in containers with
71    ///      `/proc` hidden), returns `(0, 0.0)`.
72    /// Test: `sample_does_not_panic`, `rss_is_plausible`.
73    pub fn sample(&mut self) -> (u64, f32) {
74        self.sys.refresh_processes_specifics(
75            ProcessesToUpdate::Some(&[self.pid]),
76            true,
77            ProcessRefreshKind::nothing().with_memory().with_cpu(),
78        );
79        match self.sys.process(self.pid) {
80            Some(proc) => (proc.memory() / (1024 * 1024), proc.cpu_usage()),
81            None => (0, 0.0),
82        }
83    }
84}
85
86impl Default for SysMetrics {
87    fn default() -> Self {
88        Self::new()
89    }
90}
91
92/// Sum the byte sizes of every regular file under `dir`, recursively.
93///
94/// Why: daemon `/health` reports `disk_bytes` — the on-disk footprint of the
95///      data directory (redb + usearch + snapshot files). Walking the tree on
96///      demand keeps it accurate without a separate accounting layer.
97/// What: recursively descends `dir`, summing `metadata().len()` of each file.
98///      Symlinks are not followed (avoids double-counting and cycles).
99///      Unreadable entries are skipped rather than failing the whole walk —
100///      a health endpoint should degrade gracefully. Returns `0` when `dir`
101///      does not exist.
102/// Test: `dir_size_sums_files` creates files of known sizes and asserts the
103///      total; `dir_size_missing_dir_is_zero` covers the absent-path case.
104#[must_use]
105pub fn dir_size_bytes(dir: &std::path::Path) -> u64 {
106    fn walk(dir: &std::path::Path, total: &mut u64) {
107        let Ok(entries) = std::fs::read_dir(dir) else {
108            return;
109        };
110        for entry in entries.flatten() {
111            let Ok(file_type) = entry.file_type() else {
112                continue;
113            };
114            if file_type.is_symlink() {
115                continue;
116            }
117            if file_type.is_dir() {
118                walk(&entry.path(), total);
119                continue;
120            }
121            if !file_type.is_file() {
122                continue;
123            }
124            if let Ok(meta) = entry.metadata() {
125                *total = total.saturating_add(meta.len());
126            }
127        }
128    }
129    let mut total = 0u64;
130    walk(dir, &mut total);
131    total
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn sample_does_not_panic() {
140        let mut m = SysMetrics::new();
141        let (_rss, _cpu) = m.sample();
142        // A second sample exercises the CPU-delta path.
143        let (_rss2, cpu2) = m.sample();
144        assert!(cpu2 >= 0.0, "cpu usage must be non-negative, got {cpu2}");
145    }
146
147    #[test]
148    fn rss_is_plausible() {
149        let mut m = SysMetrics::new();
150        let (rss, _cpu) = m.sample();
151        // The test binary is real; if sysinfo could resolve it RSS is > 0.
152        // We tolerate 0 only for sandboxed CI where /proc is restricted.
153        assert!(
154            rss < 1024 * 1024,
155            "RSS implausibly large ({rss} MB) — unit must be MB"
156        );
157    }
158
159    #[test]
160    fn dir_size_sums_files() {
161        let tmp = tempfile::tempdir().expect("tempdir");
162        std::fs::write(tmp.path().join("a.txt"), vec![0u8; 100]).unwrap();
163        std::fs::write(tmp.path().join("b.txt"), vec![0u8; 250]).unwrap();
164        let sub = tmp.path().join("sub");
165        std::fs::create_dir(&sub).unwrap();
166        std::fs::write(sub.join("c.txt"), vec![0u8; 50]).unwrap();
167        assert_eq!(dir_size_bytes(tmp.path()), 400);
168    }
169
170    #[test]
171    fn dir_size_missing_dir_is_zero() {
172        let missing = std::path::Path::new("/nonexistent/trusty/path/xyz");
173        assert_eq!(dir_size_bytes(missing), 0);
174    }
175}