trusty_common/sys_metrics.rs
1//! Process resident-memory (RSS) and CPU sampling for daemon `/health`.
2//!
3//! Why: Every trusty-* daemon wants to report its own RSS and CPU usage on
4//! its health endpoint, and the sampling logic (resolve our PID, refresh
5//! only this process, convert units) is identical across them.
6//! Centralising it here avoids three near-identical copies drifting.
7//! What: [`SysMetrics`] wraps a `sysinfo::System` scoped to the current
8//! process. [`SysMetrics::sample`] refreshes and returns
9//! `(rss_mb, cpu_pct)`. CPU usage is a delta between two refreshes, so
10//! the *first* sample reports `0.0`; subsequent samples report the
11//! usage observed since the previous call. Callers polling `/health`
12//! every ~2 s get meaningful CPU readings without any background task.
13//! Test: see the `tests` module — `sample_does_not_panic` exercises the
14//! refresh path; `rss_is_plausible` asserts the test process reports a
15//! non-trivial, non-absurd RSS.
16
17use sysinfo::{Pid, ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System};
18
19/// Per-process RSS + CPU sampler bound to the current process.
20///
21/// Why: holding the `System` between calls is required for CPU measurement —
22/// `sysinfo` derives CPU% from the delta in consumed CPU time between
23/// two refreshes, so the same instance must be reused.
24/// What: stores the long-lived `System` and our own `Pid`. Not `Clone` — it
25/// carries mutable sampling state; share it behind a `Mutex` if multiple
26/// handlers need it.
27/// Test: `sample_does_not_panic`, `rss_is_plausible`.
28pub struct SysMetrics {
29 sys: System,
30 pid: Pid,
31}
32
33impl SysMetrics {
34 /// Construct a sampler for the current process.
35 ///
36 /// Why: the daemon builds one of these at startup and samples it on each
37 /// `/health` request.
38 /// What: resolves `std::process::id()` into a `sysinfo::Pid` and creates a
39 /// `System` configured to refresh only process memory + CPU (not the
40 /// whole machine), then performs one priming refresh so the next
41 /// `sample` call has a baseline for the CPU delta.
42 /// Test: `sample_does_not_panic`.
43 #[must_use]
44 pub fn new() -> Self {
45 let pid = Pid::from_u32(std::process::id());
46 let mut sys = System::new_with_specifics(
47 RefreshKind::nothing()
48 .with_processes(ProcessRefreshKind::nothing().with_memory().with_cpu()),
49 );
50 // Prime the CPU baseline — the first delta-based reading after this
51 // will be meaningful rather than a spurious 0/huge value.
52 sys.refresh_processes_specifics(
53 ProcessesToUpdate::Some(&[pid]),
54 true,
55 ProcessRefreshKind::nothing().with_memory().with_cpu(),
56 );
57 Self { sys, pid }
58 }
59
60 /// Refresh and return `(rss_mb, cpu_pct)` for the current process.
61 ///
62 /// Why: the `/health` handler calls this once per request. Polling more
63 /// often than ~once per 500 ms yields noisy CPU readings because the
64 /// delta window shrinks; `/health` is typically polled every 2 s so
65 /// this is not a concern in practice.
66 /// What: refreshes this process's memory + CPU stats. Returns RSS in
67 /// whole megabytes (`bytes / 1_048_576`) and CPU as a percentage
68 /// where `100.0` means one fully-saturated core (sysinfo's
69 /// convention — a process on 4 cores can exceed 100). If the process
70 /// cannot be resolved (extremely rare; only in containers with
71 /// `/proc` hidden), returns `(0, 0.0)`.
72 /// Test: `sample_does_not_panic`, `rss_is_plausible`.
73 pub fn sample(&mut self) -> (u64, f32) {
74 self.sys.refresh_processes_specifics(
75 ProcessesToUpdate::Some(&[self.pid]),
76 true,
77 ProcessRefreshKind::nothing().with_memory().with_cpu(),
78 );
79 match self.sys.process(self.pid) {
80 Some(proc) => (proc.memory() / (1024 * 1024), proc.cpu_usage()),
81 None => (0, 0.0),
82 }
83 }
84}
85
86impl Default for SysMetrics {
87 fn default() -> Self {
88 Self::new()
89 }
90}
91
92/// Sum the byte sizes of every regular file under `dir`, recursively.
93///
94/// Why: daemon `/health` reports `disk_bytes` — the on-disk footprint of the
95/// data directory (redb + usearch + snapshot files). Walking the tree on
96/// demand keeps it accurate without a separate accounting layer.
97/// What: recursively descends `dir`, summing `metadata().len()` of each file.
98/// Symlinks are not followed (avoids double-counting and cycles).
99/// Unreadable entries are skipped rather than failing the whole walk —
100/// a health endpoint should degrade gracefully. Returns `0` when `dir`
101/// does not exist.
102/// Test: `dir_size_sums_files` creates files of known sizes and asserts the
103/// total; `dir_size_missing_dir_is_zero` covers the absent-path case.
104#[must_use]
105pub fn dir_size_bytes(dir: &std::path::Path) -> u64 {
106 fn walk(dir: &std::path::Path, total: &mut u64) {
107 let Ok(entries) = std::fs::read_dir(dir) else {
108 return;
109 };
110 for entry in entries.flatten() {
111 let Ok(file_type) = entry.file_type() else {
112 continue;
113 };
114 if file_type.is_symlink() {
115 continue;
116 }
117 if file_type.is_dir() {
118 walk(&entry.path(), total);
119 continue;
120 }
121 if !file_type.is_file() {
122 continue;
123 }
124 if let Ok(meta) = entry.metadata() {
125 *total = total.saturating_add(meta.len());
126 }
127 }
128 }
129 let mut total = 0u64;
130 walk(dir, &mut total);
131 total
132}
133
134#[cfg(test)]
135mod tests {
136 use super::*;
137
138 #[test]
139 fn sample_does_not_panic() {
140 let mut m = SysMetrics::new();
141 let (_rss, _cpu) = m.sample();
142 // A second sample exercises the CPU-delta path.
143 let (_rss2, cpu2) = m.sample();
144 assert!(cpu2 >= 0.0, "cpu usage must be non-negative, got {cpu2}");
145 }
146
147 #[test]
148 fn rss_is_plausible() {
149 let mut m = SysMetrics::new();
150 let (rss, _cpu) = m.sample();
151 // The test binary is real; if sysinfo could resolve it RSS is > 0.
152 // We tolerate 0 only for sandboxed CI where /proc is restricted.
153 assert!(
154 rss < 1024 * 1024,
155 "RSS implausibly large ({rss} MB) — unit must be MB"
156 );
157 }
158
159 #[test]
160 fn dir_size_sums_files() {
161 let tmp = tempfile::tempdir().expect("tempdir");
162 std::fs::write(tmp.path().join("a.txt"), vec![0u8; 100]).unwrap();
163 std::fs::write(tmp.path().join("b.txt"), vec![0u8; 250]).unwrap();
164 let sub = tmp.path().join("sub");
165 std::fs::create_dir(&sub).unwrap();
166 std::fs::write(sub.join("c.txt"), vec![0u8; 50]).unwrap();
167 assert_eq!(dir_size_bytes(tmp.path()), 400);
168 }
169
170 #[test]
171 fn dir_size_missing_dir_is_zero() {
172 let missing = std::path::Path::new("/nonexistent/trusty/path/xyz");
173 assert_eq!(dir_size_bytes(missing), 0);
174 }
175}