varta_watch/clock.rs
1//! Configurable monotonic clock for stall-threshold accounting.
2//!
3//! The observer's stall detector decides "this PID has been silent for too
4//! long" by subtracting a recorded `last_beat_ns` from a "now_ns" derived
5//! from a monotonic clock. Which kernel clock backs that "now_ns" depends
6//! on the deployment profile.
7//!
8//! # Per-platform semantics
9//!
10//! `CLOCK_MONOTONIC` is not a POSIX-mandated numeric constant, and its
11//! behavior across system suspend / sleep differs by kernel. The shipped
12//! clock sources are:
13//!
14//! - **`monotonic` (default, all platforms)** — `CLOCK_MONOTONIC`.
15//! - Linux (`clk_id = 1`): pauses while the host is suspended.
16//! NTP-slewable.
17//! - BSD (`clk_id = 4`): pauses while the host is suspended.
18//! Linux-compatible semantics.
19//! - macOS / iOS (`clk_id = 6`): backed by `mach_absolute_time`. Pauses
20//! during sleep on 10.12 (Sierra) and later — the same observable
21//! semantics as Linux. The underlying tick rate is host-dependent
22//! (≈24 MHz on Apple Silicon, ≈1 GHz on Intel); `clock_gettime`
23//! reports nanoseconds regardless, so downstream stall arithmetic
24//! is unaffected by the hardware difference.
25//!
26//! `monotonic` is the right semantic for fleet observability: a
27//! 30-minute host suspend should NOT fire a stall alert across every
28//! agent on that host.
29//!
30//! - **`boottime` (Linux only)** — `CLOCK_BOOTTIME` (`clk_id = 7`).
31//! Continues to advance during suspend. This is the right semantic for
32//! battery-conscious clinical devices (insulin pumps, holter monitors)
33//! that aggressively suspend to sleep: a 4-hour suspend IS a 4-hour
34//! silence and MUST register as a stall on wake-up. Rejected at startup
35//! on every non-Linux target.
36//!
37//! - **`monotonic-raw` (macOS / iOS only)** — `CLOCK_MONOTONIC_RAW`
38//! (`clk_id = 4`), backed by `mach_continuous_time`. Continues to
39//! advance during sleep — the Darwin equivalent of Linux's
40//! `CLOCK_BOOTTIME`. This is the right choice for macOS-hosted clinical
41//! devices or any deployment where "wall-clock silence including sleep"
42//! is the stall semantic. Rejected at startup on every non-macOS
43//! target; Linux operators should use `boottime` and BSD operators have
44//! no equivalent. (Note: Linux also defines `CLOCK_MONOTONIC_RAW`, but
45//! there it merely opts out of NTP slewing — it still pauses during
46//! suspend. Exposing it on Linux would invite a name collision with
47//! different semantics, so the variant is structurally macOS-only.)
48//!
49//! See `book/src/architecture/safety-profiles.md` for the deployment
50//! matrix.
51//!
52//! # Implementation
53//!
54//! [`Clock`] is a concrete struct, not a trait. The single-threaded
55//! observer poll loop calls [`Clock::now_ns`] once per tick; a vtable
56//! indirection would add a per-tick predicted branch with no benefit, and
57//! parameterising every downstream type on a `Clock` generic would
58//! explode the signature surface. The internal `match self.source` is
59//! one well-predicted branch on each call.
60//!
61//! Raw `extern "C" clock_gettime(2)` is used rather than the `libc` crate
62//! — same pattern as the project's `getrandom` (cerebrum 2026-05-12) and
63//! `sigaction` (main.rs:54) FFI sites. No registry dependency.
64
65use std::io;
66
67/// `CLOCK_MONOTONIC` is NOT a POSIX-mandated numeric constant — values
68/// differ across kernels. Source-of-truth per platform:
69///
70/// - Linux: `<bits/time.h>` — `CLOCK_MONOTONIC = 1`
71/// - macOS/iOS: `<sys/_types/_clock_id.h>` — `_CLOCK_MONOTONIC = 6` (10.12+)
72/// - FreeBSD: `<sys/_clock_id.h>` — `CLOCK_MONOTONIC = 4`
73/// - NetBSD/OpenBSD/DragonFly: same as FreeBSD (4)
74#[cfg(target_os = "linux")]
75const CLOCK_MONOTONIC: i32 = 1;
76#[cfg(any(target_os = "macos", target_os = "ios"))]
77const CLOCK_MONOTONIC: i32 = 6;
78#[cfg(any(
79 target_os = "freebsd",
80 target_os = "netbsd",
81 target_os = "openbsd",
82 target_os = "dragonfly",
83))]
84const CLOCK_MONOTONIC: i32 = 4;
85#[cfg(not(any(
86 target_os = "linux",
87 target_os = "macos",
88 target_os = "ios",
89 target_os = "freebsd",
90 target_os = "netbsd",
91 target_os = "openbsd",
92 target_os = "dragonfly",
93)))]
94const CLOCK_MONOTONIC: i32 = 1; // Last-resort default — most kernels follow Linux.
95
96/// Linux: `<bits/time.h>` — `CLOCK_BOOTTIME` (since 2.6.39). Like
97/// `CLOCK_MONOTONIC`, but also includes time the system has been
98/// suspended. Linux-only — do NOT use on other targets.
99#[cfg(target_os = "linux")]
100const CLOCK_BOOTTIME: i32 = 7;
101
102/// Darwin: `<sys/_types/_clock_id.h>` — `_CLOCK_MONOTONIC_RAW = 4` (10.12+),
103/// backed by `mach_continuous_time`. Unlike Linux's same-numbered constant
104/// (which still pauses during suspend) this advances through sleep — the
105/// Darwin equivalent of Linux's `CLOCK_BOOTTIME`. The variant is exposed to
106/// operators only on macOS / iOS; using it on any other platform is a hard
107/// error at startup.
108#[cfg(any(target_os = "macos", target_os = "ios"))]
109const CLOCK_MONOTONIC_RAW: i32 = 4;
110
111/// Kernel clock backing stall-threshold accounting.
112///
113/// Wire-format and observer semantics are unchanged; only the kernel
114/// clock that drives "now_ns" is configurable.
115#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
116pub enum ClockSource {
117 /// `CLOCK_MONOTONIC` — pauses on system suspend. SRE default,
118 /// available on every supported platform.
119 #[default]
120 Monotonic,
121 /// `CLOCK_BOOTTIME` (Linux only) — advances through suspend.
122 /// Medical / embedded deployment.
123 Boottime,
124 /// `CLOCK_MONOTONIC_RAW` (macOS / iOS only) — backed by
125 /// `mach_continuous_time`; advances through sleep. Darwin equivalent
126 /// of Linux's `Boottime`. Rejected at startup on non-Darwin targets.
127 MonotonicRaw,
128}
129
130impl std::fmt::Display for ClockSource {
131 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
132 match self {
133 ClockSource::Monotonic => f.write_str("monotonic"),
134 ClockSource::Boottime => f.write_str("boottime"),
135 ClockSource::MonotonicRaw => f.write_str("monotonic-raw"),
136 }
137 }
138}
139
140impl std::str::FromStr for ClockSource {
141 type Err = ClockSourceParseError;
142
143 fn from_str(s: &str) -> Result<Self, Self::Err> {
144 match s {
145 "monotonic" => Ok(ClockSource::Monotonic),
146 "boottime" => Ok(ClockSource::Boottime),
147 "monotonic-raw" | "monotonic_raw" => Ok(ClockSource::MonotonicRaw),
148 other => Err(ClockSourceParseError {
149 raw: other.to_string(),
150 }),
151 }
152 }
153}
154
155/// Parse error surfaced when `--clock-source` is given an unknown value.
156#[derive(Debug)]
157pub struct ClockSourceParseError {
158 /// The raw value the operator supplied.
159 pub raw: String,
160}
161
162impl std::fmt::Display for ClockSourceParseError {
163 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164 write!(
165 f,
166 "unknown clock source {:?}: expected one of `monotonic`, `boottime`, `monotonic-raw`",
167 self.raw
168 )
169 }
170}
171
172impl std::error::Error for ClockSourceParseError {}
173
174/// Numeric tag used by the self-watchdog `static CLOCK_SOURCE: AtomicU8`
175/// in `main.rs` to communicate the chosen source to the background
176/// watchdog thread without an `Arc`.
177impl ClockSource {
178 /// 0 → `Monotonic`, 1 → `Boottime`, 2 → `MonotonicRaw`. Stable across
179 /// versions; only ever produced by `as_u8` on the same enum.
180 pub fn as_u8(self) -> u8 {
181 match self {
182 ClockSource::Monotonic => 0,
183 ClockSource::Boottime => 1,
184 ClockSource::MonotonicRaw => 2,
185 }
186 }
187
188 /// Inverse of [`Self::as_u8`]; unknown values fall back to `Monotonic`
189 /// (defensive — the only writer is `as_u8` on the same enum).
190 pub fn from_u8(byte: u8) -> Self {
191 match byte {
192 1 => ClockSource::Boottime,
193 2 => ClockSource::MonotonicRaw,
194 _ => ClockSource::Monotonic,
195 }
196 }
197
198 /// Kernel `clk_id` argument for `clock_gettime(2)`.
199 ///
200 /// Returns `None` when the source is unsupported on the current
201 /// platform (e.g. `Boottime` on macOS, `MonotonicRaw` on Linux/BSD).
202 pub fn clk_id(self) -> Option<i32> {
203 match self {
204 ClockSource::Monotonic => Some(CLOCK_MONOTONIC),
205 #[cfg(target_os = "linux")]
206 ClockSource::Boottime => Some(CLOCK_BOOTTIME),
207 #[cfg(not(target_os = "linux"))]
208 ClockSource::Boottime => None,
209 #[cfg(any(target_os = "macos", target_os = "ios"))]
210 ClockSource::MonotonicRaw => Some(CLOCK_MONOTONIC_RAW),
211 #[cfg(not(any(target_os = "macos", target_os = "ios")))]
212 ClockSource::MonotonicRaw => None,
213 }
214 }
215}
216
217/// Failures surfaced by [`Clock::new`].
218#[derive(Debug)]
219pub enum ClockError {
220 /// The requested `ClockSource` has no kernel equivalent on this
221 /// platform. Currently fires for `Boottime` on every non-Linux
222 /// target.
223 Unsupported {
224 /// The source the operator requested.
225 source: ClockSource,
226 /// `std::env::consts::OS` at compile time, for the error message.
227 platform: &'static str,
228 },
229 /// `clock_gettime(2)` returned an OS-level error.
230 Os(io::Error),
231}
232
233impl std::fmt::Display for ClockError {
234 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
235 match self {
236 ClockError::Unsupported { source, platform } => {
237 let hint = match source {
238 ClockSource::Boottime => {
239 " (Linux only; on macOS use `monotonic-raw` for advance-through-sleep semantics)"
240 }
241 ClockSource::MonotonicRaw => {
242 " (macOS / iOS only; on Linux use `boottime` for advance-through-sleep semantics)"
243 }
244 ClockSource::Monotonic => "",
245 };
246 write!(
247 f,
248 "clock source `{source}` is not supported on `{platform}`{hint}"
249 )
250 }
251 ClockError::Os(e) => write!(f, "clock_gettime: {e}"),
252 }
253 }
254}
255
256impl std::error::Error for ClockError {
257 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
258 match self {
259 ClockError::Unsupported { .. } => None,
260 ClockError::Os(e) => Some(e),
261 }
262 }
263}
264
265impl From<ClockError> for io::Error {
266 fn from(e: ClockError) -> Self {
267 match e {
268 ClockError::Os(inner) => inner,
269 ClockError::Unsupported { .. } => {
270 io::Error::new(io::ErrorKind::Unsupported, e.to_string())
271 }
272 }
273 }
274}
275
276// --- Raw clock_gettime FFI ---------------------------------------------------
277//
278// Per-platform `struct timespec`. POSIX specifies `tv_sec: time_t,
279// tv_nsec: long`; `time_t` and `long` widths differ per OS.
280
281#[cfg(target_os = "linux")]
282#[repr(C)]
283struct Timespec {
284 tv_sec: i64,
285 tv_nsec: i64,
286}
287
288#[cfg(any(target_os = "macos", target_os = "ios"))]
289#[repr(C)]
290struct Timespec {
291 /// `time_t` on Darwin is `__darwin_time_t = long = i64` on 64-bit.
292 tv_sec: i64,
293 /// `long` on Darwin is i64 on 64-bit (LP64). `<sys/_types/_timespec.h>`
294 /// defines `tv_nsec` as `long`, matching `tv_sec` width.
295 tv_nsec: i64,
296}
297
298#[cfg(any(
299 target_os = "freebsd",
300 target_os = "netbsd",
301 target_os = "openbsd",
302 target_os = "dragonfly",
303))]
304#[repr(C)]
305struct Timespec {
306 tv_sec: i64,
307 tv_nsec: i64,
308}
309
310#[cfg(not(any(
311 target_os = "linux",
312 target_os = "macos",
313 target_os = "ios",
314 target_os = "freebsd",
315 target_os = "netbsd",
316 target_os = "openbsd",
317 target_os = "dragonfly",
318)))]
319#[repr(C)]
320struct Timespec {
321 tv_sec: i64,
322 tv_nsec: i64,
323}
324
325extern "C" {
326 fn clock_gettime(clk_id: i32, tp: *mut Timespec) -> i32;
327}
328
329/// Read the requested kernel clock and return nanoseconds since its
330/// epoch as a `u64`.
331///
332/// The caller is responsible for clamping forward-monotonic over a baseline;
333/// this helper just exposes the raw clock value. Used both by [`Clock`]
334/// (observer hot path) and by the self-watchdog thread in `main.rs`.
335pub fn clock_gettime_raw(clk_id: i32) -> io::Result<u64> {
336 let mut tp = Timespec {
337 tv_sec: 0,
338 tv_nsec: 0,
339 };
340 // SAFETY: `tp` is a valid, exclusively-owned `Timespec` and remains in
341 // scope for the duration of the call. `clock_gettime` writes to `tp`
342 // only on success; the caller has exclusive `&mut` access through the
343 // raw pointer here.
344 let rc = unsafe { clock_gettime(clk_id, &mut tp as *mut Timespec) };
345 if rc != 0 {
346 return Err(io::Error::last_os_error());
347 }
348 // `tv_sec` and `tv_nsec` are non-negative for any reasonable clock_id.
349 // Cast carefully and saturate to u64::MAX on overflow.
350 let sec = if tp.tv_sec < 0 {
351 0u64
352 } else {
353 tp.tv_sec as u64
354 };
355 let nsec = if tp.tv_nsec < 0 {
356 0u64
357 } else {
358 tp.tv_nsec as u64
359 };
360 let total = sec
361 .checked_mul(1_000_000_000)
362 .and_then(|s| s.checked_add(nsec))
363 .unwrap_or(u64::MAX);
364 Ok(total)
365}
366
367/// Monotonic clock anchored to an observer-startup baseline.
368///
369/// Mirrors the semantics of `Observer::start.elapsed().as_nanos()` so
370/// downstream stall arithmetic is unchanged when the operator does not
371/// pass `--clock-source`.
372pub struct Clock {
373 source: ClockSource,
374 start_ns: u64,
375}
376
377impl Clock {
378 /// Build a `Clock` backed by `source`.
379 ///
380 /// Performs one `clock_gettime(2)` call to anchor `start_ns`. Returns
381 /// `ClockError::Unsupported` when `source = Boottime` on a non-Linux
382 /// target.
383 pub fn new(source: ClockSource) -> Result<Self, ClockError> {
384 let clk_id = source.clk_id().ok_or(ClockError::Unsupported {
385 source,
386 platform: std::env::consts::OS,
387 })?;
388 let start_ns = clock_gettime_raw(clk_id).map_err(ClockError::Os)?;
389 Ok(Self { source, start_ns })
390 }
391
392 /// One-call probe: surface `Unsupported` / OS errors at startup
393 /// before threading the clock through `Observer`.
394 pub fn probe(source: ClockSource) -> Result<(), ClockError> {
395 Self::new(source).map(|_| ())
396 }
397
398 /// Nanoseconds since this `Clock`'s baseline. Saturates to `u64::MAX`
399 /// on a wildly long-running process (>584 years).
400 pub fn now_ns(&self) -> u64 {
401 let clk_id = match self.source.clk_id() {
402 Some(id) => id,
403 // Unreachable: `new` rejected the unsupported case.
404 None => return 0,
405 };
406 let raw = clock_gettime_raw(clk_id).unwrap_or(self.start_ns);
407 raw.saturating_sub(self.start_ns)
408 }
409
410 /// Inspect the configured source (used by tests and by `main.rs` to
411 /// publish into the watchdog atomic).
412 pub fn source(&self) -> ClockSource {
413 self.source
414 }
415}
416
417#[cfg(test)]
418mod tests {
419 use super::*;
420 use std::str::FromStr;
421
422 #[test]
423 fn parse_all_clock_source_variants() {
424 assert_eq!(
425 ClockSource::from_str("monotonic").unwrap(),
426 ClockSource::Monotonic
427 );
428 assert_eq!(
429 ClockSource::from_str("boottime").unwrap(),
430 ClockSource::Boottime
431 );
432 assert_eq!(
433 ClockSource::from_str("monotonic-raw").unwrap(),
434 ClockSource::MonotonicRaw
435 );
436 // Underscore spelling is accepted as a convenience.
437 assert_eq!(
438 ClockSource::from_str("monotonic_raw").unwrap(),
439 ClockSource::MonotonicRaw
440 );
441 }
442
443 #[test]
444 fn parse_unknown_value_errors() {
445 let e = ClockSource::from_str("wallclock").unwrap_err();
446 assert_eq!(e.raw, "wallclock");
447 }
448
449 #[test]
450 fn display_round_trip() {
451 for src in [
452 ClockSource::Monotonic,
453 ClockSource::Boottime,
454 ClockSource::MonotonicRaw,
455 ] {
456 let s = format!("{src}");
457 assert_eq!(ClockSource::from_str(&s).unwrap(), src);
458 }
459 }
460
461 #[test]
462 fn as_u8_from_u8_round_trip() {
463 for src in [
464 ClockSource::Monotonic,
465 ClockSource::Boottime,
466 ClockSource::MonotonicRaw,
467 ] {
468 assert_eq!(ClockSource::from_u8(src.as_u8()), src);
469 }
470 }
471
472 #[test]
473 fn monotonic_forward_only() {
474 let clk = Clock::new(ClockSource::Monotonic).expect("CLOCK_MONOTONIC must be supported");
475 let a = clk.now_ns();
476 let b = clk.now_ns();
477 assert!(b >= a, "monotonic clock regressed: {a} -> {b}");
478 }
479
480 #[cfg(target_os = "linux")]
481 #[test]
482 fn boottime_forward_only_on_linux() {
483 let clk = Clock::new(ClockSource::Boottime).expect("CLOCK_BOOTTIME must work on Linux");
484 let a = clk.now_ns();
485 let b = clk.now_ns();
486 assert!(b >= a, "boottime clock regressed: {a} -> {b}");
487 }
488
489 #[cfg(not(target_os = "linux"))]
490 #[test]
491 fn boottime_rejected_on_unsupported_platform() {
492 match Clock::new(ClockSource::Boottime) {
493 Err(ClockError::Unsupported { source, .. }) => {
494 assert_eq!(source, ClockSource::Boottime);
495 }
496 Err(other) => panic!("expected Unsupported, got {other:?}"),
497 Ok(_) => panic!("expected Boottime to be rejected on non-Linux"),
498 }
499 }
500
501 #[cfg(any(target_os = "macos", target_os = "ios"))]
502 #[test]
503 fn monotonic_raw_forward_only_on_macos() {
504 let clk =
505 Clock::new(ClockSource::MonotonicRaw).expect("CLOCK_MONOTONIC_RAW must work on macOS");
506 let a = clk.now_ns();
507 let b = clk.now_ns();
508 assert!(b >= a, "monotonic-raw clock regressed: {a} -> {b}");
509 }
510
511 #[cfg(not(any(target_os = "macos", target_os = "ios")))]
512 #[test]
513 fn monotonic_raw_rejected_on_non_macos() {
514 match Clock::new(ClockSource::MonotonicRaw) {
515 Err(ClockError::Unsupported { source, .. }) => {
516 assert_eq!(source, ClockSource::MonotonicRaw);
517 }
518 Err(other) => panic!("expected Unsupported, got {other:?}"),
519 Ok(_) => panic!("expected MonotonicRaw to be rejected outside macOS / iOS"),
520 }
521 }
522
523 #[test]
524 fn now_ns_baseline_starts_near_zero() {
525 let clk = Clock::new(ClockSource::Monotonic).unwrap();
526 let first = clk.now_ns();
527 // First call shouldn't be wildly in the future — at most a few
528 // milliseconds of slack on cold startup.
529 assert!(
530 first < 1_000_000_000,
531 "first now_ns reading too large: {first}"
532 );
533 }
534}