Skip to main content

snapdir_core/
resources.rs

1//! Best-effort system-resource samplers (CPU, RSS, total RAM).
2//!
3//! Per the library-purity principle this module does **no** terminal I/O and
4//! reads **no** `$HOME`/config/environment for *behavior*: it queries live
5//! **system** state via a few `libc` syscalls (CPU time, resident set size,
6//! physical RAM) so a higher-level controller can make adaptive decisions
7//! (e.g. a concurrency/throughput guardrail in the stores lane). It is purely
8//! advisory runtime telemetry and is **never** consulted by the walk, the
9//! manifest builder, or any snapshot computation — a walk samples identically
10//! whether or not anything reads these numbers.
11//!
12//! Everything here is strictly best-effort: every platform read that fails
13//! yields `None` rather than panicking. Each `unsafe` block performs exactly
14//! one syscall into a plain-old-data struct and checks the return code before
15//! trusting the result; no `unwrap`/`expect` is used on any syscall path.
16//!
17//! The [`CpuSampler`] mirrors the renderer's sampler in
18//! `snapdir-cli`'s `progress` module, and [`resident_set_bytes`] mirrors its
19//! `sample_rss`; [`total_ram_bytes`] is new (the controller needs a
20//! memory-budget denominator that the CLI renderer never sampled).
21
22// This module converts kernel time/size counters between integer syscall
23// outputs and `f64` to derive a CPU percentage. The lossy/sign casts are
24// inherent to an *advisory* utilization number (never a correctness path), so
25// the pedantic cast lints are allowed module-wide rather than peppered onto
26// every arithmetic line, matching the CLI progress engine's convention.
27#![allow(
28    clippy::cast_precision_loss,
29    clippy::cast_possible_truncation,
30    clippy::cast_sign_loss,
31    clippy::cast_lossless
32)]
33
34use std::time::Instant;
35
36/// Reads cumulative process CPU time (user + system) in seconds via
37/// `getrusage(RUSAGE_SELF)`. Returns `None` on failure; never panics.
38fn rusage_cpu_secs() -> Option<f64> {
39    // SAFETY: rusage is plain POD; we pass a valid &mut to a single syscall and
40    // only read the struct after confirming the return code is 0.
41    unsafe {
42        let mut ru: libc::rusage = std::mem::zeroed();
43        if libc::getrusage(libc::RUSAGE_SELF, std::ptr::addr_of_mut!(ru)) != 0 {
44            return None;
45        }
46        let secs = |tv: libc::timeval| tv.tv_sec as f64 + tv.tv_usec as f64 / 1_000_000.0;
47        Some(secs(ru.ru_utime) + secs(ru.ru_stime))
48    }
49}
50
51/// Samples process CPU utilization as a percentage of total machine capacity,
52/// normalized by the number of available cores so `100%` means "one core fully
53/// busy". Values can exceed 100% (up to ~`100 × cores`) when multiple cores are
54/// saturated, and are clamped to that range.
55///
56/// Stateful: each [`poll`](CpuSampler::poll) measures the CPU consumed since the
57/// previous poll over the elapsed wall-clock window. The first poll only
58/// establishes a baseline and returns `None`.
59pub struct CpuSampler {
60    /// `(instant, cumulative_cpu_seconds)` captured at the previous poll.
61    prev: Option<(Instant, f64)>,
62    /// Available parallelism (logical cores), used to normalize the percentage.
63    cores: f64,
64}
65
66impl CpuSampler {
67    /// Creates a sampler. `cores` comes from
68    /// [`std::thread::available_parallelism`], falling back to `1` if the count
69    /// is unavailable.
70    #[must_use]
71    pub fn new() -> Self {
72        let cores = std::thread::available_parallelism().map_or(1.0, |n| n.get() as f64);
73        Self { prev: None, cores }
74    }
75
76    /// Polls CPU usage. The first call establishes a baseline and returns
77    /// `None`; subsequent calls return `Some(pct)` over the elapsed window —
78    /// `(cpu_delta / wall_delta) / cores * 100`, clamped to `[0, 100 × cores]` —
79    /// or `None` if `getrusage` is unavailable or no wall time has elapsed.
80    pub fn poll(&mut self) -> Option<f64> {
81        let now = Instant::now();
82        let cpu = rusage_cpu_secs()?;
83        match self.prev {
84            None => {
85                self.prev = Some((now, cpu));
86                None
87            }
88            Some((prev_t, prev_cpu)) => {
89                let wall = now.duration_since(prev_t).as_secs_f64();
90                self.prev = Some((now, cpu));
91                if wall <= 0.0 {
92                    return None;
93                }
94                let cpu_delta = (cpu - prev_cpu).max(0.0);
95                let pct = (cpu_delta / wall) / self.cores * 100.0;
96                Some(pct.clamp(0.0, 100.0 * self.cores))
97            }
98        }
99    }
100}
101
102impl Default for CpuSampler {
103    fn default() -> Self {
104        Self::new()
105    }
106}
107
108/// Returns the current process resident set size (RSS) in bytes, best-effort.
109///
110/// - Linux: 2nd field (resident pages) of `/proc/self/statm` × page size.
111/// - macOS: mach `task_info(MACH_TASK_BASIC_INFO)` `.resident_size`.
112/// - Other targets: `None`.
113///
114/// Returns `None` on any error; never panics.
115#[must_use]
116pub fn resident_set_bytes() -> Option<u64> {
117    #[cfg(target_os = "linux")]
118    {
119        let statm = std::fs::read_to_string("/proc/self/statm").ok()?;
120        let resident_pages: u64 = statm.split_whitespace().nth(1)?.parse().ok()?;
121        // SAFETY: sysconf is a pure query with no pointer args; we check its
122        // return for the documented `-1` failure sentinel before using it.
123        let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
124        if page_size <= 0 {
125            return None;
126        }
127        Some(resident_pages.saturating_mul(page_size as u64))
128    }
129    #[cfg(target_os = "macos")]
130    {
131        // SAFETY: task_info writes into a correctly-sized info struct; we pass
132        // the matching flavor + count and only read `resident_size` after the
133        // kern_return_t is KERN_SUCCESS. `mach_task_self_` is the static that
134        // the deprecated `mach_task_self()` helper merely reads; use it
135        // directly to avoid pulling the `mach2` crate for one port handle.
136        #[allow(deprecated)]
137        unsafe {
138            let mut info: libc::mach_task_basic_info = std::mem::zeroed();
139            let mut count: libc::mach_msg_type_number_t =
140                (std::mem::size_of::<libc::mach_task_basic_info>()
141                    / std::mem::size_of::<libc::integer_t>())
142                    as libc::mach_msg_type_number_t;
143            let kr = libc::task_info(
144                libc::mach_task_self_,
145                libc::MACH_TASK_BASIC_INFO,
146                std::ptr::addr_of_mut!(info).cast(),
147                std::ptr::addr_of_mut!(count),
148            );
149            if kr == libc::KERN_SUCCESS {
150                Some(info.resident_size)
151            } else {
152                None
153            }
154        }
155    }
156    #[cfg(not(any(target_os = "linux", target_os = "macos")))]
157    {
158        None
159    }
160}
161
162/// Returns the total physical RAM of the machine in bytes, best-effort.
163///
164/// - Linux: `sysconf(_SC_PHYS_PAGES) × sysconf(_SC_PAGE_SIZE)`.
165/// - macOS: `sysctlbyname("hw.memsize", …)`.
166/// - Other targets: `None`.
167///
168/// Returns `None` on any error; never panics. The adaptive controller uses this
169/// as the denominator of its memory-budget guardrail.
170#[must_use]
171pub fn total_ram_bytes() -> Option<u64> {
172    #[cfg(target_os = "linux")]
173    {
174        // SAFETY: both are pure sysconf queries with no pointer args; we check
175        // each for the documented `-1`/`0` failure sentinels before using them.
176        let pages = unsafe { libc::sysconf(libc::_SC_PHYS_PAGES) };
177        let page_size = unsafe { libc::sysconf(libc::_SC_PAGE_SIZE) };
178        if pages <= 0 || page_size <= 0 {
179            return None;
180        }
181        Some((pages as u64).saturating_mul(page_size as u64))
182    }
183    #[cfg(target_os = "macos")]
184    {
185        let mut memsize: u64 = 0;
186        let mut len: libc::size_t = std::mem::size_of::<u64>();
187        // The sysctl name is a NUL-terminated C string.
188        let name = c"hw.memsize";
189        // SAFETY: we pass a valid NUL-terminated name, a correctly-sized output
190        // buffer (`&mut u64`) with its matching length, and no new-value
191        // buffer; the result is only trusted when the call returns 0 and the
192        // kernel reported the full 8-byte width.
193        let rc = unsafe {
194            libc::sysctlbyname(
195                name.as_ptr(),
196                std::ptr::addr_of_mut!(memsize).cast(),
197                std::ptr::addr_of_mut!(len),
198                std::ptr::null_mut(),
199                0,
200            )
201        };
202        if rc == 0 && len == std::mem::size_of::<u64>() && memsize > 0 {
203            Some(memsize)
204        } else {
205            None
206        }
207    }
208    #[cfg(not(any(target_os = "linux", target_os = "macos")))]
209    {
210        None
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    #[test]
219    fn resources_samplers_are_bounded_and_safe() {
220        // resident_set_bytes: either Some(plausible) or None; never panics.
221        if let Some(rss) = resident_set_bytes() {
222            assert!(rss > 0, "rss should be positive when sampled: {rss}");
223            assert!(
224                rss < 1024u64 * 1024 * 1024 * 1024,
225                "rss implausibly large: {rss}"
226            );
227        }
228
229        // CpuSampler: first poll is a baseline (None); a later poll is either
230        // None or Some(non-negative, finite) and never exceeds 100 × cores.
231        let cores = std::thread::available_parallelism().map_or(1.0, |n| n.get() as f64);
232        let mut sampler = CpuSampler::new();
233        assert!(
234            sampler.poll().is_none(),
235            "first poll must be a baseline (None)"
236        );
237        // Do a little work so the second window has a chance of being > 0.
238        let mut acc = 0u64;
239        for i in 0..1_000_000u64 {
240            acc = acc.wrapping_add(i);
241        }
242        std::hint::black_box(acc);
243        if let Some(pct) = sampler.poll() {
244            assert!(pct >= 0.0, "cpu pct negative: {pct}");
245            assert!(pct.is_finite(), "cpu pct not finite: {pct}");
246            assert!(
247                pct <= 100.0 * cores + 1e-6,
248                "cpu pct exceeds capacity: {pct} (cores {cores})"
249            );
250        }
251    }
252
253    #[test]
254    fn resources_total_ram_is_positive() {
255        let ram = total_ram_bytes();
256        #[cfg(any(target_os = "linux", target_os = "macos"))]
257        {
258            // This is the dev/CI platform — total RAM must be sampleable and
259            // sane (> 0, and not absurdly large).
260            let n = ram.expect("total_ram_bytes must be Some on linux/macos");
261            assert!(n > 0, "total ram should be positive: {n}");
262            assert!(
263                n < 1024u64 * 1024 * 1024 * 1024 * 1024,
264                "total ram implausibly large: {n}"
265            );
266        }
267        #[cfg(not(any(target_os = "linux", target_os = "macos")))]
268        {
269            // On a truly unknown OS we only require that it does not panic;
270            // None is acceptable, but a Some must still be > 0.
271            if let Some(n) = ram {
272                assert!(n > 0, "total ram should be positive when sampled: {n}");
273            }
274        }
275    }
276}