clock_bound_shm/
lib.rs

1//! ClockBound Shared Memory
2//!
3//! This crate implements the low-level IPC functionality to share ClockErrorBound data and clock
4//! status over a shared memory segment. This crate is meant to be used by the C and Rust versions
5//! of the ClockBound client library.
6
7// TODO: prevent clippy from checking for dead code. The writer module is only re-exported publicly
8// if the write feature is selected. There may be a better way to do that and re-enable the lint.
9#![allow(dead_code)]
10
11// Re-exports reader and writer. The writer is conditionally included under the "writer" feature.
12pub use crate::reader::ShmReader;
13#[cfg(feature = "writer")]
14pub use crate::writer::{ShmWrite, ShmWriter};
15
16pub mod common;
17mod reader;
18mod shm_header;
19mod writer;
20
21use errno::Errno;
22use nix::sys::time::{TimeSpec, TimeValLike};
23use std::ffi::CStr;
24
25use common::{clock_gettime_safe, CLOCK_MONOTONIC, CLOCK_REALTIME};
26
27const CLOCKBOUND_RESTART_GRACE_PERIOD: TimeSpec = TimeSpec::new(5, 0);
28
29/// Convenience macro to build a ShmError::SyscallError with extra info from errno and custom
30/// origin information.
31#[macro_export]
32macro_rules! syserror {
33    ($origin:expr) => {
34        Err($crate::ShmError::SyscallError(
35            ::errno::errno(),
36            ::std::ffi::CStr::from_bytes_with_nul(concat!($origin, "\0").as_bytes()).unwrap(),
37        ))
38    };
39}
40
41/// Error condition returned by all low-level ClockBound APIs.
42#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
43pub enum ShmError {
44    /// A system call failed.
45    /// Variant includes the Errno struct with error details, and an indication on the origin of
46    /// the system call that error'ed.
47    SyscallError(Errno, &'static CStr),
48
49    /// The shared memory segment is not initialized.
50    SegmentNotInitialized,
51
52    /// The shared memory segment is initialized but malformed.
53    SegmentMalformed,
54
55    /// Failed causality check when comparing timestamps.
56    CausalityBreach,
57
58    /// The shared memory segment version is not supported.
59    SegmentVersionNotSupported,
60}
61
62/// Definition of mutually exclusive clock status exposed to the reader.
63#[repr(C)]
64#[derive(Debug, Copy, Clone, PartialEq)]
65pub enum ClockStatus {
66    /// The status of the clock is unknown.
67    Unknown = 0,
68
69    /// The clock is kept accurate by the synchronization daemon.
70    Synchronized = 1,
71
72    /// The clock is free running and not updated by the synchronization daemon.
73    FreeRunning = 2,
74
75    /// The clock has been disrupted and the accuracy of time cannot be bounded.
76    Disrupted = 3,
77}
78
79/// Structure that holds the ClockErrorBound data captured at a specific point in time and valid
80/// until a subsequent point in time.
81///
82/// The ClockErrorBound structure supports calculating the actual bound on clock error at any time,
83/// using its `now()` method. The internal fields are not meant to be accessed directly.
84///
85/// Note that the timestamps in between which this ClockErrorBound data is valid are captured using
86/// a CLOCK_MONOTONIC_COARSE clock. The monotonic clock id is required to correctly measure the
87/// duration during which clock drift possibly accrues, and avoid events when the clock is set,
88/// smeared or affected by leap seconds.
89///
90/// The structure is shared across the Shared Memory segment and has a C representation to enforce
91/// this specific layout.
92#[repr(C)]
93#[derive(Debug, Copy, Clone, PartialEq)]
94pub struct ClockErrorBound {
95    /// The CLOCK_MONOTONIC_COARSE timestamp recorded when the bound on clock error was
96    /// calculated. The current implementation relies on Chrony tracking data, which accounts for
97    /// the dispersion between the last clock processing event, and the reading of tracking data.
98    as_of: TimeSpec,
99
100    /// The CLOCK_MONOTONIC_COARSE timestamp beyond which the bound on clock error should not be
101    /// trusted. This is a useful signal that the communication with the synchronization daemon is
102    /// has failed, for example.
103    void_after: TimeSpec,
104
105    /// An absolute upper bound on the accuracy of the `CLOCK_REALTIME` clock with regards to true
106    /// time at the instant represented by `as_of`.
107    bound_nsec: i64,
108
109    /// Disruption marker.
110    ///
111    /// This value is incremented (by an unspecified delta) each time the clock has been disrupted.
112    /// This count value is specific to a particular VM/EC2 instance.
113    pub disruption_marker: u64,
114
115    /// Maximum drift rate of the clock between updates of the synchronization daemon. The value
116    /// stored in `bound_nsec` should increase by the following to account for the clock drift
117    /// since `bound_nsec` was computed:
118    /// `bound_nsec += max_drift_ppb * (now - as_of)`
119    max_drift_ppb: u32,
120
121    /// The synchronization daemon status indicates whether the daemon is synchronized,
122    /// free-running, etc.
123    clock_status: ClockStatus,
124
125    /// Clock disruption support enabled flag.
126    ///
127    /// This indicates whether or not the ClockBound daemon was started with a
128    /// configuration that supports detecting clock disruptions.
129    pub clock_disruption_support_enabled: bool,
130
131    /// Padding.
132    _padding: [u8; 7],
133}
134
135impl Default for ClockErrorBound {
136    /// Get a default ClockErrorBound struct
137    /// Equivalent to zero'ing this bit of memory
138    fn default() -> Self {
139        ClockErrorBound {
140            as_of: TimeSpec::new(0, 0),
141            void_after: TimeSpec::new(0, 0),
142            bound_nsec: 0,
143            disruption_marker: 0,
144            max_drift_ppb: 0,
145            clock_status: ClockStatus::Unknown,
146            clock_disruption_support_enabled: false,
147            _padding: [0u8; 7],
148        }
149    }
150}
151
152impl ClockErrorBound {
153    /// Create a new ClockErrorBound struct.
154    pub fn new(
155        as_of: TimeSpec,
156        void_after: TimeSpec,
157        bound_nsec: i64,
158        disruption_marker: u64,
159        max_drift_ppb: u32,
160        clock_status: ClockStatus,
161        clock_disruption_support_enabled: bool,
162    ) -> ClockErrorBound {
163        ClockErrorBound {
164            as_of,
165            void_after,
166            bound_nsec,
167            disruption_marker,
168            max_drift_ppb,
169            clock_status,
170            clock_disruption_support_enabled,
171            _padding: [0u8; 7],
172        }
173    }
174
175    /// The ClockErrorBound equivalent of clock_gettime(), but with bound on accuracy.
176    ///
177    /// Returns a pair of (earliest, latest) timespec between which current time exists. The
178    /// interval width is twice the clock error bound (ceb) such that:
179    ///   (earliest, latest) = ((now - ceb), (now + ceb))
180    /// The function also returns a clock status to assert that the clock is being synchronized, or
181    /// free-running, or ...
182    pub fn now(&self) -> Result<(TimeSpec, TimeSpec, ClockStatus), ShmError> {
183        // Read the clock, start with the REALTIME one to be as close as possible to the event the
184        // caller is interested in. The monotonic clock should be read after. It is correct for the
185        // process be preempted between the two calls: a delayed read of the monotonic clock will
186        // make the bound on clock error more pessimistic, but remains correct.
187        let real = clock_gettime_safe(CLOCK_REALTIME)?;
188        let mono = clock_gettime_safe(CLOCK_MONOTONIC)?;
189
190        self.compute_bound_at(real, mono)
191    }
192
193    /// Compute the bound on clock error at a given point in time.
194    ///
195    /// The time at which the bound is computed is defined by the (real, mono) pair of timestamps
196    /// read from the realtime and monotonic clock respectively, *roughly* at the same time. The
197    /// details to correctly work around the "rough" alignment of the timestamps is not something
198    /// we want to leave to the user of ClockBound, hence this method is private. Although `now()`
199    /// may be it only caller, decoupling the two make writing unit tests a bit easier.
200    fn compute_bound_at(
201        &self,
202        real: TimeSpec,
203        mono: TimeSpec,
204    ) -> Result<(TimeSpec, TimeSpec, ClockStatus), ShmError> {
205        // Sanity checks:
206        // - `now()` should operate on a consistent snapshot of the shared memory segment, and
207        //   causality between mono and as_of should be enforced.
208        // - a extremely high value of the `max_drift_ppb` is a sign of something going wrong
209        if self.max_drift_ppb >= 1_000_000_000 {
210            return Err(ShmError::SegmentMalformed);
211        }
212
213        // If the ClockErrorBound data has not been updated "recently", the status of the clock
214        // cannot be guaranteed. Things are ambiguous, the synchronization daemon may be dead, or
215        // its interaction with the clockbound daemon is broken, or ... In any case, we signal the
216        // caller that guarantees are gone. We could return an Err here, but choosing to leverage
217        // ClockStatus instead, and putting the responsibility on the caller to check the clock
218        // status value being returned.
219        // TODO: this may not be the most ergonomic decision, putting a pin here to revisit this
220        // decision once the client code is fleshed out.
221        let clock_status = match self.clock_status {
222            // If the status in the shared memory segment is Unknown or Disrupted, returns that
223            // status.
224            ClockStatus::Unknown | ClockStatus::Disrupted => self.clock_status,
225
226            // If the status is Synchronized or FreeRunning, the expectation from the client is
227            // that the data is useable. However, if the clockbound daemon died or has not update
228            // the shared memory segment in a while, the status written to the shared memory
229            // segment may not be reliable anymore.
230            ClockStatus::Synchronized | ClockStatus::FreeRunning => {
231                if mono < self.as_of + CLOCKBOUND_RESTART_GRACE_PERIOD {
232                    // Allow for a restart of the daemon, for a short period of time, the status is
233                    // trusted to be correct.
234                    self.clock_status
235                } else if mono < self.void_after {
236                    // Beyond the grace period, for a free running status.
237                    ClockStatus::FreeRunning
238                } else {
239                    // If beyond void_after, no guarantee is provided anymore.
240                    ClockStatus::Unknown
241                }
242            }
243        };
244
245        // Calculate the duration that has elapsed between the instant when the CEB parameters were
246        // snapshot'ed from the SHM segment (approximated by `as_of`), and the instant when the
247        // request to calculate the CEB was actually requested (approximated by `mono`). This
248        // duration is used to compute the growth of the error bound due to local dispersion
249        // between polling chrony and now.
250        //
251        // To avoid miscalculation in case the synchronization daemon is restarted, a
252        // CLOCK_MONOTONIC is used, since it is designed to not jump. Because we want this to be
253        // fast, and the exact accuracy is not critical here, we use CLOCK_MONOTONIC_COARSE on
254        // platforms that support it.
255        //
256        // But ... there is a catch. When validating causality of these events that is, `as_of`
257        // should always be older than `mono`, we observed this test to sometimes fail, with `mono`
258        // being older by a handful of nanoseconds. The root cause is not completely understood,
259        // but points to the clock resolution and/or update strategy and/or propagation of the
260        // updates through the VDSO memory page. See this for details:
261        // https://t.corp.amazon.com/P101954401.
262        //
263        // The following implementation is a mitigation.
264        //   1. if as_of <= mono is younger than as_of, calculate the duration (happy path)
265        //   2. if as_of - epsilon < mono < as_of, set the duration to 0
266        //   3. if mono < as_of - epsilon, return an error
267        //
268        // In short, this relaxes the sanity check a bit to accept some imprecision in the clock
269        // reading routines.
270        //
271        // What is a good value for `epsilon`?
272        // The CLOCK_MONOTONIC_COARSE resolution is a function of the HZ kernel variable defining
273        // the last kernel tick that drives this clock (e.g. HZ=250 leads to a 4 millisecond
274        // resolution). We could use the `clock_getres()` system call to retrieve this value but
275        // this makes diagnosing over different platform / OS configurations more complex. Instead
276        // settling on an arbitrary default value of 1 millisecond.
277        let causality_blur = self.as_of - TimeSpec::new(0, 1000);
278
279        let duration = if mono >= self.as_of {
280            // Happy path, no causality doubt
281            mono - self.as_of
282        } else if mono > causality_blur {
283            // Causality is "almost" broken. We are within a range that could be due to the clock
284            // precision. Let's approximate this to equality between mono and as_of.
285            TimeSpec::new(0, 0)
286        } else {
287            // Causality is breached.
288            return Err(ShmError::CausalityBreach);
289        };
290
291        // Inflate the bound on clock error with the maximum drift the clock may be experiencing
292        // between the snapshot being read and ~now.
293        let duration_sec = duration.num_nanoseconds() as f64 / 1_000_000_000_f64;
294        let updated_bound = TimeSpec::nanoseconds(
295            self.bound_nsec + (duration_sec * self.max_drift_ppb as f64) as i64,
296        );
297
298        // Build the (earliest, latest) interval within which true time exists.
299        let earliest = real - updated_bound;
300        let latest = real + updated_bound;
301
302        Ok((earliest, latest, clock_status))
303    }
304}
305
306#[cfg(test)]
307mod t_lib {
308    use super::*;
309
310    // Convenience macro to build ClockBoundError for unit tests
311    macro_rules! clockbound {
312        (($asof_tv_sec:literal, $asof_tv_nsec:literal), ($after_tv_sec:literal, $after_tv_nsec:literal)) => {
313            ClockErrorBound::new(
314                TimeSpec::new($asof_tv_sec, $asof_tv_nsec), // as_of
315                TimeSpec::new($after_tv_sec, $after_tv_nsec), // void_after
316                10000,                                      // bound_nsec
317                0,                                          // disruption_marker
318                1000,                                       // max_drift_ppb
319                ClockStatus::Synchronized,                  // clock_status
320                true,                                       // clock_disruption_support_enabled
321            )
322        };
323    }
324
325    /// Assert the bound on clock error is computed correctly
326    #[test]
327    fn compute_bound_ok() {
328        let ceb = clockbound!((0, 0), (10, 0));
329        let real = TimeSpec::new(2, 0);
330        let mono = TimeSpec::new(2, 0);
331
332        let (earliest, latest, status) = ceb
333            .compute_bound_at(real, mono)
334            .expect("Failed to compute bound");
335
336        // 2 seconds have passed since the bound was snapshot, hence 2 microsec of drift on top of
337        // the default 10 microsec put in the ClockBoundError data
338        assert_eq!(earliest.tv_sec(), 1);
339        assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 12_000);
340        assert_eq!(latest.tv_sec(), 2);
341        assert_eq!(latest.tv_nsec(), 12_000);
342        assert_eq!(status, ClockStatus::Synchronized);
343    }
344
345    /// Assert the bound on clock error is computed correctly, with realtime and monotonic clocks
346    /// disagreeing on time
347    #[test]
348    fn compute_bound_ok_when_real_ahead() {
349        let ceb = clockbound!((0, 0), (10, 0));
350        let real = TimeSpec::new(20, 0); // realtime clock way ahead
351        let mono = TimeSpec::new(4, 0);
352
353        let (earliest, latest, status) = ceb
354            .compute_bound_at(real, mono)
355            .expect("Failed to compute bound");
356
357        // 4 seconds have passed since the bound was snapshot, hence 4 microsec of drift on top of
358        // the default 10 microsec put in the ClockBoundError data
359        assert_eq!(earliest.tv_sec(), 19);
360        assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 14_000);
361        assert_eq!(latest.tv_sec(), 20);
362        assert_eq!(latest.tv_nsec(), 14_000);
363        assert_eq!(status, ClockStatus::Synchronized);
364    }
365
366    /// Assert the clock status is FreeRunning if the ClockErrorBound data is passed the grace
367    /// period
368    #[test]
369    fn compute_bound_force_free_running_status() {
370        let ceb = clockbound!((0, 0), (100, 0));
371        let real = TimeSpec::new(8, 0);
372        let mono = TimeSpec::new(8, 0);
373
374        let (earliest, latest, status) = ceb
375            .compute_bound_at(real, mono)
376            .expect("Failed to compute bound");
377
378        // 8 seconds have passed since the bound was snapshot, hence 8 microsec of drift on top of
379        // the default 10 microsec put in the ClockBoundError data
380        assert_eq!(earliest.tv_sec(), 7);
381        assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 18_000);
382        assert_eq!(latest.tv_sec(), 8);
383        assert_eq!(latest.tv_nsec(), 18_000);
384        assert_eq!(status, ClockStatus::FreeRunning);
385    }
386
387    /// Assert the clock status is Unknown if the ClockErrorBound data is passed void_after
388    #[test]
389    fn compute_bound_unknown_status_if_expired() {
390        let ceb = clockbound!((0, 0), (5, 0));
391        let real = TimeSpec::new(10, 0);
392        let mono = TimeSpec::new(10, 0); // Passed void_after
393
394        let (earliest, latest, status) = ceb
395            .compute_bound_at(real, mono)
396            .expect("Failed to compute bound");
397
398        // 10 seconds have passed since the bound was snapshot, hence 10 microsec of drift on top of
399        // the default 10 microsec put in the ClockBoundError data
400        assert_eq!(earliest.tv_sec(), 9);
401        assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 20_000);
402        assert_eq!(latest.tv_sec(), 10);
403        assert_eq!(latest.tv_nsec(), 20_000);
404        assert_eq!(status, ClockStatus::Unknown);
405    }
406
407    /// Assert errors are returned if the ClockBoundError data is malformed with bad drift
408    #[test]
409    fn compute_bound_bad_drift() {
410        let mut ceb = clockbound!((0, 0), (10, 0));
411        let real = TimeSpec::new(5, 0);
412        let mono = TimeSpec::new(5, 0);
413        ceb.max_drift_ppb = 2_000_000_000;
414
415        assert!(ceb.compute_bound_at(real, mono).is_err());
416    }
417
418    /// Assert errors are returned if the ClockBoundError data snapshot has been taken after
419    /// reading clocks at 'now'
420    #[test]
421    fn compute_bound_causality_break() {
422        let ceb = clockbound!((5, 0), (10, 0));
423        let real = TimeSpec::new(1, 0);
424        let mono = TimeSpec::new(1, 0);
425
426        let res = ceb.compute_bound_at(real, mono);
427
428        assert!(res.is_err());
429    }
430}