snapdir_core/resources.rs
1//! Best-effort system-resource samplers (CPU, RSS, total RAM).
2//!
3//! Per the library-purity principle this module does **no** terminal I/O and
4//! reads **no** `$HOME`/config/environment for *behavior*: it queries live
5//! **system** state via a few `libc` syscalls (CPU time, resident set size,
6//! physical RAM) so a higher-level controller can make adaptive decisions
7//! (e.g. a concurrency/throughput guardrail in the stores lane). It is purely
8//! advisory runtime telemetry and is **never** consulted by the walk, the
9//! manifest builder, or any snapshot computation — a walk samples identically
10//! whether or not anything reads these numbers.
11//!
12//! Everything here is strictly best-effort: every platform read that fails
13//! yields `None` rather than panicking. Each `unsafe` block performs exactly
14//! one syscall into a plain-old-data struct and checks the return code before
15//! trusting the result; no `unwrap`/`expect` is used on any syscall path.
16//!
17//! The [`CpuSampler`] mirrors the renderer's sampler in
18//! `snapdir-cli`'s `progress` module, and [`resident_set_bytes`] mirrors its
19//! `sample_rss`; [`total_ram_bytes`] is new (the controller needs a
20//! memory-budget denominator that the CLI renderer never sampled).
21
22// This module converts kernel time/size counters between integer syscall
23// outputs and `f64` to derive a CPU percentage. The lossy/sign casts are
24// inherent to an *advisory* utilization number (never a correctness path), so
25// the pedantic cast lints are allowed module-wide rather than peppered onto
26// every arithmetic line, matching the CLI progress engine's convention.
27#![allow(
28 clippy::cast_precision_loss,
29 clippy::cast_possible_truncation,
30 clippy::cast_sign_loss,
31 clippy::cast_lossless
32)]
33
34use std::time::Instant;
35
36/// Reads cumulative process CPU time (user + system) in seconds via
37/// `getrusage(RUSAGE_SELF)`. Returns `None` on failure; never panics.
38fn rusage_cpu_secs() -> Option<f64> {
39 // SAFETY: rusage is plain POD; we pass a valid &mut to a single syscall and
40 // only read the struct after confirming the return code is 0.
41 unsafe {
42 let mut ru: libc::rusage = std::mem::zeroed();
43 if libc::getrusage(libc::RUSAGE_SELF, std::ptr::addr_of_mut!(ru)) != 0 {
44 return None;
45 }
46 let secs = |tv: libc::timeval| tv.tv_sec as f64 + tv.tv_usec as f64 / 1_000_000.0;
47 Some(secs(ru.ru_utime) + secs(ru.ru_stime))
48 }
49}
50
51/// Samples process CPU utilization as a percentage of total machine capacity,
52/// normalized by the number of available cores so `100%` means "one core fully
53/// busy". Values can exceed 100% (up to ~`100 × cores`) when multiple cores are
54/// saturated, and are clamped to that range.
55///
56/// Stateful: each [`poll`](CpuSampler::poll) measures the CPU consumed since the
57/// previous poll over the elapsed wall-clock window. The first poll only
58/// establishes a baseline and returns `None`.
59pub struct CpuSampler {
60 /// `(instant, cumulative_cpu_seconds)` captured at the previous poll.
61 prev: Option<(Instant, f64)>,
62 /// Available parallelism (logical cores), used to normalize the percentage.
63 cores: f64,
64}
65
66impl CpuSampler {
67 /// Creates a sampler. `cores` comes from
68 /// [`std::thread::available_parallelism`], falling back to `1` if the count
69 /// is unavailable.
70 #[must_use]
71 pub fn new() -> Self {
72 let cores = std::thread::available_parallelism().map_or(1.0, |n| n.get() as f64);
73 Self { prev: None, cores }
74 }
75
76 /// Polls CPU usage. The first call establishes a baseline and returns
77 /// `None`; subsequent calls return `Some(pct)` over the elapsed window —
78 /// `(cpu_delta / wall_delta) / cores * 100`, clamped to `[0, 100 × cores]` —
79 /// or `None` if `getrusage` is unavailable or no wall time has elapsed.
80 pub fn poll(&mut self) -> Option<f64> {
81 let now = Instant::now();
82 let cpu = rusage_cpu_secs()?;
83 match self.prev {
84 None => {
85 self.prev = Some((now, cpu));
86 None
87 }
88 Some((prev_t, prev_cpu)) => {
89 let wall = now.duration_since(prev_t).as_secs_f64();
90 self.prev = Some((now, cpu));
91 if wall <= 0.0 {
92 return None;
93 }
94 let cpu_delta = (cpu - prev_cpu).max(0.0);
95 let pct = (cpu_delta / wall) / self.cores * 100.0;
96 Some(pct.clamp(0.0, 100.0 * self.cores))
97 }
98 }
99 }
100}
101
102impl Default for CpuSampler {
103 fn default() -> Self {
104 Self::new()
105 }
106}
107
108/// Returns the current process resident set size (RSS) in bytes, best-effort.
109///
110/// - Linux: 2nd field (resident pages) of `/proc/self/statm` × page size.
111/// - macOS: mach `task_info(MACH_TASK_BASIC_INFO)` `.resident_size`.
112/// - Other targets: `None`.
113///
114/// Returns `None` on any error; never panics.
115#[must_use]
116pub fn resident_set_bytes() -> Option<u64> {
117 #[cfg(target_os = "linux")]
118 {
119 let statm = std::fs::read_to_string("/proc/self/statm").ok()?;
120 let resident_pages: u64 = statm.split_whitespace().nth(1)?.parse().ok()?;
121 // SAFETY: sysconf is a pure query with no pointer args; we check its
122 // return for the documented `-1` failure sentinel before using it.
123 let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
124 if page_size <= 0 {
125 return None;
126 }
127 Some(resident_pages.saturating_mul(page_size as u64))
128 }
129 #[cfg(target_os = "macos")]
130 {
131 // SAFETY: task_info writes into a correctly-sized info struct; we pass
132 // the matching flavor + count and only read `resident_size` after the
133 // kern_return_t is KERN_SUCCESS. `mach_task_self_` is the static that
134 // the deprecated `mach_task_self()` helper merely reads; use it
135 // directly to avoid pulling the `mach2` crate for one port handle.
136 #[allow(deprecated)]
137 unsafe {
138 let mut info: libc::mach_task_basic_info = std::mem::zeroed();
139 let mut count: libc::mach_msg_type_number_t =
140 (std::mem::size_of::<libc::mach_task_basic_info>()
141 / std::mem::size_of::<libc::integer_t>())
142 as libc::mach_msg_type_number_t;
143 let kr = libc::task_info(
144 libc::mach_task_self_,
145 libc::MACH_TASK_BASIC_INFO,
146 std::ptr::addr_of_mut!(info).cast(),
147 std::ptr::addr_of_mut!(count),
148 );
149 if kr == libc::KERN_SUCCESS {
150 Some(info.resident_size)
151 } else {
152 None
153 }
154 }
155 }
156 #[cfg(not(any(target_os = "linux", target_os = "macos")))]
157 {
158 None
159 }
160}
161
162/// Returns the total physical RAM of the machine in bytes, best-effort.
163///
164/// - Linux: `sysconf(_SC_PHYS_PAGES) × sysconf(_SC_PAGE_SIZE)`.
165/// - macOS: `sysctlbyname("hw.memsize", …)`.
166/// - Other targets: `None`.
167///
168/// Returns `None` on any error; never panics. The adaptive controller uses this
169/// as the denominator of its memory-budget guardrail.
170#[must_use]
171pub fn total_ram_bytes() -> Option<u64> {
172 #[cfg(target_os = "linux")]
173 {
174 // SAFETY: both are pure sysconf queries with no pointer args; we check
175 // each for the documented `-1`/`0` failure sentinels before using them.
176 let pages = unsafe { libc::sysconf(libc::_SC_PHYS_PAGES) };
177 let page_size = unsafe { libc::sysconf(libc::_SC_PAGE_SIZE) };
178 if pages <= 0 || page_size <= 0 {
179 return None;
180 }
181 Some((pages as u64).saturating_mul(page_size as u64))
182 }
183 #[cfg(target_os = "macos")]
184 {
185 let mut memsize: u64 = 0;
186 let mut len: libc::size_t = std::mem::size_of::<u64>();
187 // The sysctl name is a NUL-terminated C string.
188 let name = c"hw.memsize";
189 // SAFETY: we pass a valid NUL-terminated name, a correctly-sized output
190 // buffer (`&mut u64`) with its matching length, and no new-value
191 // buffer; the result is only trusted when the call returns 0 and the
192 // kernel reported the full 8-byte width.
193 let rc = unsafe {
194 libc::sysctlbyname(
195 name.as_ptr(),
196 std::ptr::addr_of_mut!(memsize).cast(),
197 std::ptr::addr_of_mut!(len),
198 std::ptr::null_mut(),
199 0,
200 )
201 };
202 if rc == 0 && len == std::mem::size_of::<u64>() && memsize > 0 {
203 Some(memsize)
204 } else {
205 None
206 }
207 }
208 #[cfg(not(any(target_os = "linux", target_os = "macos")))]
209 {
210 None
211 }
212}
213
214#[cfg(test)]
215mod tests {
216 use super::*;
217
218 #[test]
219 fn resources_samplers_are_bounded_and_safe() {
220 // resident_set_bytes: either Some(plausible) or None; never panics.
221 if let Some(rss) = resident_set_bytes() {
222 assert!(rss > 0, "rss should be positive when sampled: {rss}");
223 assert!(
224 rss < 1024u64 * 1024 * 1024 * 1024,
225 "rss implausibly large: {rss}"
226 );
227 }
228
229 // CpuSampler: first poll is a baseline (None); a later poll is either
230 // None or Some(non-negative, finite) and never exceeds 100 × cores.
231 let cores = std::thread::available_parallelism().map_or(1.0, |n| n.get() as f64);
232 let mut sampler = CpuSampler::new();
233 assert!(
234 sampler.poll().is_none(),
235 "first poll must be a baseline (None)"
236 );
237 // Do a little work so the second window has a chance of being > 0.
238 let mut acc = 0u64;
239 for i in 0..1_000_000u64 {
240 acc = acc.wrapping_add(i);
241 }
242 std::hint::black_box(acc);
243 if let Some(pct) = sampler.poll() {
244 assert!(pct >= 0.0, "cpu pct negative: {pct}");
245 assert!(pct.is_finite(), "cpu pct not finite: {pct}");
246 assert!(
247 pct <= 100.0 * cores + 1e-6,
248 "cpu pct exceeds capacity: {pct} (cores {cores})"
249 );
250 }
251 }
252
253 #[test]
254 fn resources_total_ram_is_positive() {
255 let ram = total_ram_bytes();
256 #[cfg(any(target_os = "linux", target_os = "macos"))]
257 {
258 // This is the dev/CI platform — total RAM must be sampleable and
259 // sane (> 0, and not absurdly large).
260 let n = ram.expect("total_ram_bytes must be Some on linux/macos");
261 assert!(n > 0, "total ram should be positive: {n}");
262 assert!(
263 n < 1024u64 * 1024 * 1024 * 1024 * 1024,
264 "total ram implausibly large: {n}"
265 );
266 }
267 #[cfg(not(any(target_os = "linux", target_os = "macos")))]
268 {
269 // On a truly unknown OS we only require that it does not panic;
270 // None is acceptable, but a Some must still be > 0.
271 if let Some(n) = ram {
272 assert!(n > 0, "total ram should be positive when sampled: {n}");
273 }
274 }
275 }
276}