Skip to main content

varta_watch/
clock.rs

1//! Configurable monotonic clock for stall-threshold accounting.
2//!
3//! The observer's stall detector decides "this PID has been silent for too
4//! long" by subtracting a recorded `last_beat_ns` from a "now_ns" derived
5//! from a monotonic clock.  Which kernel clock backs that "now_ns" depends
6//! on the deployment profile.
7//!
8//! # Per-platform semantics
9//!
10//! `CLOCK_MONOTONIC` is not a POSIX-mandated numeric constant, and its
11//! behavior across system suspend / sleep differs by kernel. The shipped
12//! clock sources are:
13//!
14//! - **`monotonic` (default, all platforms)** — `CLOCK_MONOTONIC`.
15//!   - Linux (`clk_id = 1`): pauses while the host is suspended.
16//!     NTP-slewable.
17//!   - BSD (`clk_id = 4`): pauses while the host is suspended.
18//!     Linux-compatible semantics.
19//!   - macOS / iOS (`clk_id = 6`): backed by `mach_absolute_time`. Pauses
20//!     during sleep on 10.12 (Sierra) and later — the same observable
21//!     semantics as Linux. The underlying tick rate is host-dependent
22//!     (≈24 MHz on Apple Silicon, ≈1 GHz on Intel); `clock_gettime`
23//!     reports nanoseconds regardless, so downstream stall arithmetic
24//!     is unaffected by the hardware difference.
25//!
26//!   `monotonic` is the right semantic for fleet observability: a
27//!   30-minute host suspend should NOT fire a stall alert across every
28//!   agent on that host.
29//!
30//! - **`boottime` (Linux only)** — `CLOCK_BOOTTIME` (`clk_id = 7`).
31//!   Continues to advance during suspend.  This is the right semantic for
32//!   battery-conscious clinical devices (insulin pumps, holter monitors)
33//!   that aggressively suspend to sleep: a 4-hour suspend IS a 4-hour
34//!   silence and MUST register as a stall on wake-up. Rejected at startup
35//!   on every non-Linux target.
36//!
37//! - **`monotonic-raw` (macOS / iOS only)** — `CLOCK_MONOTONIC_RAW`
38//!   (`clk_id = 4`), backed by `mach_continuous_time`. Continues to
39//!   advance during sleep — the Darwin equivalent of Linux's
40//!   `CLOCK_BOOTTIME`. This is the right choice for macOS-hosted clinical
41//!   devices or any deployment where "wall-clock silence including sleep"
42//!   is the stall semantic. Rejected at startup on every non-macOS
43//!   target; Linux operators should use `boottime` and BSD operators have
44//!   no equivalent. (Note: Linux also defines `CLOCK_MONOTONIC_RAW`, but
45//!   there it merely opts out of NTP slewing — it still pauses during
46//!   suspend. Exposing it on Linux would invite a name collision with
47//!   different semantics, so the variant is structurally macOS-only.)
48//!
49//! See `book/src/architecture/safety-profiles.md` for the deployment
50//! matrix.
51//!
52//! # Implementation
53//!
54//! [`Clock`] is a concrete struct, not a trait.  The single-threaded
55//! observer poll loop calls [`Clock::now_ns`] once per tick; a vtable
56//! indirection would add a per-tick predicted branch with no benefit, and
57//! parameterising every downstream type on a `Clock` generic would
58//! explode the signature surface.  The internal `match self.source` is
59//! one well-predicted branch on each call.
60//!
61//! Raw `extern "C" clock_gettime(2)` is used rather than the `libc` crate
62//! — same pattern as the project's `getrandom` (cerebrum 2026-05-12) and
63//! `sigaction` (main.rs:54) FFI sites.  No registry dependency.
64
65use std::io;
66
67/// `CLOCK_MONOTONIC` is NOT a POSIX-mandated numeric constant — values
68/// differ across kernels. Source-of-truth per platform:
69///
70/// - Linux:    `<bits/time.h>` — `CLOCK_MONOTONIC = 1`
71/// - macOS/iOS: `<sys/_types/_clock_id.h>` — `_CLOCK_MONOTONIC = 6` (10.12+)
72/// - FreeBSD:  `<sys/_clock_id.h>` — `CLOCK_MONOTONIC = 4`
73/// - NetBSD/OpenBSD/DragonFly: same as FreeBSD (4)
74#[cfg(target_os = "linux")]
75const CLOCK_MONOTONIC: i32 = 1;
76#[cfg(any(target_os = "macos", target_os = "ios"))]
77const CLOCK_MONOTONIC: i32 = 6;
78#[cfg(any(
79    target_os = "freebsd",
80    target_os = "netbsd",
81    target_os = "openbsd",
82    target_os = "dragonfly",
83))]
84const CLOCK_MONOTONIC: i32 = 4;
85#[cfg(not(any(
86    target_os = "linux",
87    target_os = "macos",
88    target_os = "ios",
89    target_os = "freebsd",
90    target_os = "netbsd",
91    target_os = "openbsd",
92    target_os = "dragonfly",
93)))]
94const CLOCK_MONOTONIC: i32 = 1; // Last-resort default — most kernels follow Linux.
95
96/// Linux: `<bits/time.h>` — `CLOCK_BOOTTIME` (since 2.6.39). Like
97/// `CLOCK_MONOTONIC`, but also includes time the system has been
98/// suspended. Linux-only — do NOT use on other targets.
99#[cfg(target_os = "linux")]
100const CLOCK_BOOTTIME: i32 = 7;
101
102/// Darwin: `<sys/_types/_clock_id.h>` — `_CLOCK_MONOTONIC_RAW = 4` (10.12+),
103/// backed by `mach_continuous_time`. Unlike Linux's same-numbered constant
104/// (which still pauses during suspend) this advances through sleep — the
105/// Darwin equivalent of Linux's `CLOCK_BOOTTIME`. The variant is exposed to
106/// operators only on macOS / iOS; using it on any other platform is a hard
107/// error at startup.
108#[cfg(any(target_os = "macos", target_os = "ios"))]
109const CLOCK_MONOTONIC_RAW: i32 = 4;
110
111/// Kernel clock backing stall-threshold accounting.
112///
113/// Wire-format and observer semantics are unchanged; only the kernel
114/// clock that drives "now_ns" is configurable.
115#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
116pub enum ClockSource {
117    /// `CLOCK_MONOTONIC` — pauses on system suspend. SRE default,
118    /// available on every supported platform.
119    #[default]
120    Monotonic,
121    /// `CLOCK_BOOTTIME` (Linux only) — advances through suspend.
122    /// Medical / embedded deployment.
123    Boottime,
124    /// `CLOCK_MONOTONIC_RAW` (macOS / iOS only) — backed by
125    /// `mach_continuous_time`; advances through sleep. Darwin equivalent
126    /// of Linux's `Boottime`. Rejected at startup on non-Darwin targets.
127    MonotonicRaw,
128}
129
130impl std::fmt::Display for ClockSource {
131    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
132        match self {
133            ClockSource::Monotonic => f.write_str("monotonic"),
134            ClockSource::Boottime => f.write_str("boottime"),
135            ClockSource::MonotonicRaw => f.write_str("monotonic-raw"),
136        }
137    }
138}
139
140impl std::str::FromStr for ClockSource {
141    type Err = ClockSourceParseError;
142
143    fn from_str(s: &str) -> Result<Self, Self::Err> {
144        match s {
145            "monotonic" => Ok(ClockSource::Monotonic),
146            "boottime" => Ok(ClockSource::Boottime),
147            "monotonic-raw" | "monotonic_raw" => Ok(ClockSource::MonotonicRaw),
148            other => Err(ClockSourceParseError {
149                raw: other.to_string(),
150            }),
151        }
152    }
153}
154
155/// Parse error surfaced when `--clock-source` is given an unknown value.
156#[derive(Debug)]
157pub struct ClockSourceParseError {
158    /// The raw value the operator supplied.
159    pub raw: String,
160}
161
162impl std::fmt::Display for ClockSourceParseError {
163    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164        write!(
165            f,
166            "unknown clock source {:?}: expected one of `monotonic`, `boottime`, `monotonic-raw`",
167            self.raw
168        )
169    }
170}
171
172impl std::error::Error for ClockSourceParseError {}
173
174/// Numeric tag used by the self-watchdog `static CLOCK_SOURCE: AtomicU8`
175/// in `main.rs` to communicate the chosen source to the background
176/// watchdog thread without an `Arc`.
177impl ClockSource {
178    /// 0 → `Monotonic`, 1 → `Boottime`, 2 → `MonotonicRaw`. Stable across
179    /// versions; only ever produced by `as_u8` on the same enum.
180    pub fn as_u8(self) -> u8 {
181        match self {
182            ClockSource::Monotonic => 0,
183            ClockSource::Boottime => 1,
184            ClockSource::MonotonicRaw => 2,
185        }
186    }
187
188    /// Inverse of [`Self::as_u8`]; unknown values fall back to `Monotonic`
189    /// (defensive — the only writer is `as_u8` on the same enum).
190    pub fn from_u8(byte: u8) -> Self {
191        match byte {
192            1 => ClockSource::Boottime,
193            2 => ClockSource::MonotonicRaw,
194            _ => ClockSource::Monotonic,
195        }
196    }
197
198    /// Kernel `clk_id` argument for `clock_gettime(2)`.
199    ///
200    /// Returns `None` when the source is unsupported on the current
201    /// platform (e.g. `Boottime` on macOS, `MonotonicRaw` on Linux/BSD).
202    pub fn clk_id(self) -> Option<i32> {
203        match self {
204            ClockSource::Monotonic => Some(CLOCK_MONOTONIC),
205            #[cfg(target_os = "linux")]
206            ClockSource::Boottime => Some(CLOCK_BOOTTIME),
207            #[cfg(not(target_os = "linux"))]
208            ClockSource::Boottime => None,
209            #[cfg(any(target_os = "macos", target_os = "ios"))]
210            ClockSource::MonotonicRaw => Some(CLOCK_MONOTONIC_RAW),
211            #[cfg(not(any(target_os = "macos", target_os = "ios")))]
212            ClockSource::MonotonicRaw => None,
213        }
214    }
215}
216
217/// Failures surfaced by [`Clock::new`].
218#[derive(Debug)]
219pub enum ClockError {
220    /// The requested `ClockSource` has no kernel equivalent on this
221    /// platform.  Currently fires for `Boottime` on every non-Linux
222    /// target.
223    Unsupported {
224        /// The source the operator requested.
225        source: ClockSource,
226        /// `std::env::consts::OS` at compile time, for the error message.
227        platform: &'static str,
228    },
229    /// `clock_gettime(2)` returned an OS-level error.
230    Os(io::Error),
231}
232
233impl std::fmt::Display for ClockError {
234    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
235        match self {
236            ClockError::Unsupported { source, platform } => {
237                let hint = match source {
238                    ClockSource::Boottime => {
239                        " (Linux only; on macOS use `monotonic-raw` for advance-through-sleep semantics)"
240                    }
241                    ClockSource::MonotonicRaw => {
242                        " (macOS / iOS only; on Linux use `boottime` for advance-through-sleep semantics)"
243                    }
244                    ClockSource::Monotonic => "",
245                };
246                write!(
247                    f,
248                    "clock source `{source}` is not supported on `{platform}`{hint}"
249                )
250            }
251            ClockError::Os(e) => write!(f, "clock_gettime: {e}"),
252        }
253    }
254}
255
256impl std::error::Error for ClockError {
257    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
258        match self {
259            ClockError::Unsupported { .. } => None,
260            ClockError::Os(e) => Some(e),
261        }
262    }
263}
264
265impl From<ClockError> for io::Error {
266    fn from(e: ClockError) -> Self {
267        match e {
268            ClockError::Os(inner) => inner,
269            ClockError::Unsupported { .. } => {
270                io::Error::new(io::ErrorKind::Unsupported, e.to_string())
271            }
272        }
273    }
274}
275
276// --- Raw clock_gettime FFI ---------------------------------------------------
277//
278// Per-platform `struct timespec`. POSIX specifies `tv_sec: time_t,
279// tv_nsec: long`; `time_t` and `long` widths differ per OS.
280
281#[cfg(target_os = "linux")]
282#[repr(C)]
283struct Timespec {
284    tv_sec: i64,
285    tv_nsec: i64,
286}
287
288#[cfg(any(target_os = "macos", target_os = "ios"))]
289#[repr(C)]
290struct Timespec {
291    /// `time_t` on Darwin is `__darwin_time_t = long = i64` on 64-bit.
292    tv_sec: i64,
293    /// `long` on Darwin is i64 on 64-bit (LP64). `<sys/_types/_timespec.h>`
294    /// defines `tv_nsec` as `long`, matching `tv_sec` width.
295    tv_nsec: i64,
296}
297
298#[cfg(any(
299    target_os = "freebsd",
300    target_os = "netbsd",
301    target_os = "openbsd",
302    target_os = "dragonfly",
303))]
304#[repr(C)]
305struct Timespec {
306    tv_sec: i64,
307    tv_nsec: i64,
308}
309
310#[cfg(not(any(
311    target_os = "linux",
312    target_os = "macos",
313    target_os = "ios",
314    target_os = "freebsd",
315    target_os = "netbsd",
316    target_os = "openbsd",
317    target_os = "dragonfly",
318)))]
319#[repr(C)]
320struct Timespec {
321    tv_sec: i64,
322    tv_nsec: i64,
323}
324
325extern "C" {
326    fn clock_gettime(clk_id: i32, tp: *mut Timespec) -> i32;
327}
328
329/// Read the requested kernel clock and return nanoseconds since its
330/// epoch as a `u64`.
331///
332/// The caller is responsible for clamping forward-monotonic over a baseline;
333/// this helper just exposes the raw clock value.  Used both by [`Clock`]
334/// (observer hot path) and by the self-watchdog thread in `main.rs`.
335pub fn clock_gettime_raw(clk_id: i32) -> io::Result<u64> {
336    let mut tp = Timespec {
337        tv_sec: 0,
338        tv_nsec: 0,
339    };
340    // SAFETY: `tp` is a valid, exclusively-owned `Timespec` and remains in
341    // scope for the duration of the call. `clock_gettime` writes to `tp`
342    // only on success; the caller has exclusive `&mut` access through the
343    // raw pointer here.
344    let rc = unsafe { clock_gettime(clk_id, &mut tp as *mut Timespec) };
345    if rc != 0 {
346        return Err(io::Error::last_os_error());
347    }
348    // `tv_sec` and `tv_nsec` are non-negative for any reasonable clock_id.
349    // Cast carefully and saturate to u64::MAX on overflow.
350    let sec = if tp.tv_sec < 0 {
351        0u64
352    } else {
353        tp.tv_sec as u64
354    };
355    let nsec = if tp.tv_nsec < 0 {
356        0u64
357    } else {
358        tp.tv_nsec as u64
359    };
360    let total = sec
361        .checked_mul(1_000_000_000)
362        .and_then(|s| s.checked_add(nsec))
363        .unwrap_or(u64::MAX);
364    Ok(total)
365}
366
367/// Monotonic clock anchored to an observer-startup baseline.
368///
369/// Mirrors the semantics of `Observer::start.elapsed().as_nanos()` so
370/// downstream stall arithmetic is unchanged when the operator does not
371/// pass `--clock-source`.
372pub struct Clock {
373    source: ClockSource,
374    start_ns: u64,
375}
376
377impl Clock {
378    /// Build a `Clock` backed by `source`.
379    ///
380    /// Performs one `clock_gettime(2)` call to anchor `start_ns`. Returns
381    /// `ClockError::Unsupported` when `source = Boottime` on a non-Linux
382    /// target.
383    pub fn new(source: ClockSource) -> Result<Self, ClockError> {
384        let clk_id = source.clk_id().ok_or(ClockError::Unsupported {
385            source,
386            platform: std::env::consts::OS,
387        })?;
388        let start_ns = clock_gettime_raw(clk_id).map_err(ClockError::Os)?;
389        Ok(Self { source, start_ns })
390    }
391
392    /// One-call probe: surface `Unsupported` / OS errors at startup
393    /// before threading the clock through `Observer`.
394    pub fn probe(source: ClockSource) -> Result<(), ClockError> {
395        Self::new(source).map(|_| ())
396    }
397
398    /// Nanoseconds since this `Clock`'s baseline. Saturates to `u64::MAX`
399    /// on a wildly long-running process (>584 years).
400    pub fn now_ns(&self) -> u64 {
401        let clk_id = match self.source.clk_id() {
402            Some(id) => id,
403            // Unreachable: `new` rejected the unsupported case.
404            None => return 0,
405        };
406        let raw = clock_gettime_raw(clk_id).unwrap_or(self.start_ns);
407        raw.saturating_sub(self.start_ns)
408    }
409
410    /// Inspect the configured source (used by tests and by `main.rs` to
411    /// publish into the watchdog atomic).
412    pub fn source(&self) -> ClockSource {
413        self.source
414    }
415}
416
417#[cfg(test)]
418mod tests {
419    use super::*;
420    use std::str::FromStr;
421
422    #[test]
423    fn parse_all_clock_source_variants() {
424        assert_eq!(
425            ClockSource::from_str("monotonic").unwrap(),
426            ClockSource::Monotonic
427        );
428        assert_eq!(
429            ClockSource::from_str("boottime").unwrap(),
430            ClockSource::Boottime
431        );
432        assert_eq!(
433            ClockSource::from_str("monotonic-raw").unwrap(),
434            ClockSource::MonotonicRaw
435        );
436        // Underscore spelling is accepted as a convenience.
437        assert_eq!(
438            ClockSource::from_str("monotonic_raw").unwrap(),
439            ClockSource::MonotonicRaw
440        );
441    }
442
443    #[test]
444    fn parse_unknown_value_errors() {
445        let e = ClockSource::from_str("wallclock").unwrap_err();
446        assert_eq!(e.raw, "wallclock");
447    }
448
449    #[test]
450    fn display_round_trip() {
451        for src in [
452            ClockSource::Monotonic,
453            ClockSource::Boottime,
454            ClockSource::MonotonicRaw,
455        ] {
456            let s = format!("{src}");
457            assert_eq!(ClockSource::from_str(&s).unwrap(), src);
458        }
459    }
460
461    #[test]
462    fn as_u8_from_u8_round_trip() {
463        for src in [
464            ClockSource::Monotonic,
465            ClockSource::Boottime,
466            ClockSource::MonotonicRaw,
467        ] {
468            assert_eq!(ClockSource::from_u8(src.as_u8()), src);
469        }
470    }
471
472    #[test]
473    fn monotonic_forward_only() {
474        let clk = Clock::new(ClockSource::Monotonic).expect("CLOCK_MONOTONIC must be supported");
475        let a = clk.now_ns();
476        let b = clk.now_ns();
477        assert!(b >= a, "monotonic clock regressed: {a} -> {b}");
478    }
479
480    #[cfg(target_os = "linux")]
481    #[test]
482    fn boottime_forward_only_on_linux() {
483        let clk = Clock::new(ClockSource::Boottime).expect("CLOCK_BOOTTIME must work on Linux");
484        let a = clk.now_ns();
485        let b = clk.now_ns();
486        assert!(b >= a, "boottime clock regressed: {a} -> {b}");
487    }
488
489    #[cfg(not(target_os = "linux"))]
490    #[test]
491    fn boottime_rejected_on_unsupported_platform() {
492        match Clock::new(ClockSource::Boottime) {
493            Err(ClockError::Unsupported { source, .. }) => {
494                assert_eq!(source, ClockSource::Boottime);
495            }
496            Err(other) => panic!("expected Unsupported, got {other:?}"),
497            Ok(_) => panic!("expected Boottime to be rejected on non-Linux"),
498        }
499    }
500
501    #[cfg(any(target_os = "macos", target_os = "ios"))]
502    #[test]
503    fn monotonic_raw_forward_only_on_macos() {
504        let clk =
505            Clock::new(ClockSource::MonotonicRaw).expect("CLOCK_MONOTONIC_RAW must work on macOS");
506        let a = clk.now_ns();
507        let b = clk.now_ns();
508        assert!(b >= a, "monotonic-raw clock regressed: {a} -> {b}");
509    }
510
511    #[cfg(not(any(target_os = "macos", target_os = "ios")))]
512    #[test]
513    fn monotonic_raw_rejected_on_non_macos() {
514        match Clock::new(ClockSource::MonotonicRaw) {
515            Err(ClockError::Unsupported { source, .. }) => {
516                assert_eq!(source, ClockSource::MonotonicRaw);
517            }
518            Err(other) => panic!("expected Unsupported, got {other:?}"),
519            Ok(_) => panic!("expected MonotonicRaw to be rejected outside macOS / iOS"),
520        }
521    }
522
523    #[test]
524    fn now_ns_baseline_starts_near_zero() {
525        let clk = Clock::new(ClockSource::Monotonic).unwrap();
526        let first = clk.now_ns();
527        // First call shouldn't be wildly in the future — at most a few
528        // milliseconds of slack on cold startup.
529        assert!(
530            first < 1_000_000_000,
531            "first now_ns reading too large: {first}"
532        );
533    }
534}