clock_bound_shm/
lib.rs

1//! ClockBound Shared Memory
2//!
3//! This crate implements the low-level IPC functionality to share ClockErrorBound data and clock
4//! status over a shared memory segment. This crate is meant to be used by the C and Rust versions
5//! of the ClockBound client library.
6
7// TODO: prevent clippy from checking for dead code. The writer module is only re-exported publicly
8// if the write feature is selected. There may be a better way to do that and re-enable the lint.
9#![allow(dead_code)]
10
11// Re-exports reader and writer. The writer is conditionally included under the "writer" feature.
12pub use crate::reader::ShmReader;
13#[cfg(feature = "writer")]
14pub use crate::writer::{ShmWrite, ShmWriter};
15
16pub mod common;
17mod reader;
18mod shm_header;
19mod writer;
20
21use errno::Errno;
22use nix::sys::time::{TimeSpec, TimeValLike};
23use std::ffi::CStr;
24
25use common::{clock_gettime_safe, CLOCK_MONOTONIC, CLOCK_REALTIME};
26
27const CLOCKBOUND_RESTART_GRACE_PERIOD: TimeSpec = TimeSpec::new(5, 0);
28
29/// Convenience macro to build a ShmError::SyscallError with extra info from errno and custom
30/// origin information.
31#[macro_export]
32macro_rules! syserror {
33    ($origin:expr) => {
34        Err($crate::ShmError::SyscallError(
35            ::errno::errno(),
36            ::std::ffi::CStr::from_bytes_with_nul(concat!($origin, "\0").as_bytes()).unwrap(),
37        ))
38    };
39}
40
41/// Error condition returned by all low-level ClockBound APIs.
42#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
43pub enum ShmError {
44    /// A system call failed.
45    /// Variant includes the Errno struct with error details, and an indication on the origin of
46    /// the system call that error'ed.
47    SyscallError(Errno, &'static CStr),
48
49    /// The shared memory segment is not initialized.
50    SegmentNotInitialized,
51
52    /// The shared memory segment is initialized but malformed.
53    SegmentMalformed,
54
55    /// Failed causality check when comparing timestamps.
56    CausalityBreach,
57
58    /// The shared memory segment version is not supported.
59    SegmentVersionNotSupported,
60}
61
62/// Definition of mutually exclusive clock status exposed to the reader.
63#[repr(C)]
64#[derive(Debug, Copy, Clone, PartialEq)]
65pub enum ClockStatus {
66    /// The status of the clock is unknown.
67    /// In this clock status, error-bounded timestamps should not be trusted.
68    Unknown = 0,
69
70    /// The clock is kept accurate by the synchronization daemon.
71    /// In this clock status, error-bounded timestamps can be trusted.
72    Synchronized = 1,
73
74    /// The clock is free running and not updated by the synchronization daemon.
75    /// In this clock status, error-bounded timestamps can be trusted.
76    FreeRunning = 2,
77
78    /// The clock has been disrupted and the accuracy of time cannot be bounded.
79    /// In this clock status, error-bounded timestamps should not be trusted.
80    Disrupted = 3,
81}
82
83/// Structure that holds the ClockErrorBound data captured at a specific point in time and valid
84/// until a subsequent point in time.
85///
86/// The ClockErrorBound structure supports calculating the actual bound on clock error at any time,
87/// using its `now()` method. The internal fields are not meant to be accessed directly.
88///
89/// Note that the timestamps in between which this ClockErrorBound data is valid are captured using
90/// a CLOCK_MONOTONIC_COARSE clock. The monotonic clock id is required to correctly measure the
91/// duration during which clock drift possibly accrues, and avoid events when the clock is set,
92/// smeared or affected by leap seconds.
93///
94/// The structure is shared across the Shared Memory segment and has a C representation to enforce
95/// this specific layout.
96#[repr(C)]
97#[derive(Debug, Copy, Clone, PartialEq)]
98pub struct ClockErrorBound {
99    /// The CLOCK_MONOTONIC_COARSE timestamp recorded when the bound on clock error was
100    /// calculated. The current implementation relies on Chrony tracking data, which accounts for
101    /// the dispersion between the last clock processing event, and the reading of tracking data.
102    as_of: TimeSpec,
103
104    /// The CLOCK_MONOTONIC_COARSE timestamp beyond which the bound on clock error should not be
105    /// trusted. This is a useful signal that the communication with the synchronization daemon is
106    /// has failed, for example.
107    void_after: TimeSpec,
108
109    /// An absolute upper bound on the accuracy of the `CLOCK_REALTIME` clock with regards to true
110    /// time at the instant represented by `as_of`.
111    bound_nsec: i64,
112
113    /// Disruption marker.
114    ///
115    /// This value is incremented (by an unspecified delta) each time the clock has been disrupted.
116    /// This count value is specific to a particular VM/EC2 instance.
117    pub disruption_marker: u64,
118
119    /// Maximum drift rate of the clock between updates of the synchronization daemon. The value
120    /// stored in `bound_nsec` should increase by the following to account for the clock drift
121    /// since `bound_nsec` was computed:
122    /// `bound_nsec += max_drift_ppb * (now - as_of)`
123    max_drift_ppb: u32,
124
125    /// The synchronization daemon status indicates whether the daemon is synchronized,
126    /// free-running, etc.
127    clock_status: ClockStatus,
128
129    /// Clock disruption support enabled flag.
130    ///
131    /// This indicates whether or not the ClockBound daemon was started with a
132    /// configuration that supports detecting clock disruptions.
133    pub clock_disruption_support_enabled: bool,
134
135    /// Padding.
136    _padding: [u8; 7],
137}
138
139impl Default for ClockErrorBound {
140    /// Get a default ClockErrorBound struct
141    /// Equivalent to zero'ing this bit of memory
142    fn default() -> Self {
143        ClockErrorBound {
144            as_of: TimeSpec::new(0, 0),
145            void_after: TimeSpec::new(0, 0),
146            bound_nsec: 0,
147            disruption_marker: 0,
148            max_drift_ppb: 0,
149            clock_status: ClockStatus::Unknown,
150            clock_disruption_support_enabled: false,
151            _padding: [0u8; 7],
152        }
153    }
154}
155
156impl ClockErrorBound {
157    /// Create a new ClockErrorBound struct.
158    pub fn new(
159        as_of: TimeSpec,
160        void_after: TimeSpec,
161        bound_nsec: i64,
162        disruption_marker: u64,
163        max_drift_ppb: u32,
164        clock_status: ClockStatus,
165        clock_disruption_support_enabled: bool,
166    ) -> ClockErrorBound {
167        ClockErrorBound {
168            as_of,
169            void_after,
170            bound_nsec,
171            disruption_marker,
172            max_drift_ppb,
173            clock_status,
174            clock_disruption_support_enabled,
175            _padding: [0u8; 7],
176        }
177    }
178
179    /// The ClockErrorBound equivalent of clock_gettime(), but with bound on accuracy.
180    ///
181    /// Returns a pair of (earliest, latest) timespec between which current time exists. The
182    /// interval width is twice the clock error bound (ceb) such that:
183    ///   (earliest, latest) = ((now - ceb), (now + ceb))
184    /// The function also returns a clock status to assert that the clock is being synchronized, or
185    /// free-running, or ...
186    pub fn now(&self) -> Result<(TimeSpec, TimeSpec, ClockStatus), ShmError> {
187        // Read the clock, start with the REALTIME one to be as close as possible to the event the
188        // caller is interested in. The monotonic clock should be read after. It is correct for the
189        // process be preempted between the two calls: a delayed read of the monotonic clock will
190        // make the bound on clock error more pessimistic, but remains correct.
191        let real = clock_gettime_safe(CLOCK_REALTIME)?;
192        let mono = clock_gettime_safe(CLOCK_MONOTONIC)?;
193
194        self.compute_bound_at(real, mono)
195    }
196
197    /// Compute the bound on clock error at a given point in time.
198    ///
199    /// The time at which the bound is computed is defined by the (real, mono) pair of timestamps
200    /// read from the realtime and monotonic clock respectively, *roughly* at the same time. The
201    /// details to correctly work around the "rough" alignment of the timestamps is not something
202    /// we want to leave to the user of ClockBound, hence this method is private. Although `now()`
203    /// may be it only caller, decoupling the two make writing unit tests a bit easier.
204    fn compute_bound_at(
205        &self,
206        real: TimeSpec,
207        mono: TimeSpec,
208    ) -> Result<(TimeSpec, TimeSpec, ClockStatus), ShmError> {
209        // Sanity checks:
210        // - `now()` should operate on a consistent snapshot of the shared memory segment, and
211        //   causality between mono and as_of should be enforced.
212        // - a extremely high value of the `max_drift_ppb` is a sign of something going wrong
213        if self.max_drift_ppb >= 1_000_000_000 {
214            return Err(ShmError::SegmentMalformed);
215        }
216
217        // If the ClockErrorBound data has not been updated "recently", the status of the clock
218        // cannot be guaranteed. Things are ambiguous, the synchronization daemon may be dead, or
219        // its interaction with the clockbound daemon is broken, or ... In any case, we signal the
220        // caller that guarantees are gone. We could return an Err here, but choosing to leverage
221        // ClockStatus instead, and putting the responsibility on the caller to check the clock
222        // status value being returned.
223        // TODO: this may not be the most ergonomic decision, putting a pin here to revisit this
224        // decision once the client code is fleshed out.
225        let clock_status = match self.clock_status {
226            // If the status in the shared memory segment is Unknown or Disrupted, returns that
227            // status.
228            ClockStatus::Unknown | ClockStatus::Disrupted => self.clock_status,
229
230            // If the status is Synchronized or FreeRunning, the expectation from the client is
231            // that the data is useable. However, if the clockbound daemon died or has not update
232            // the shared memory segment in a while, the status written to the shared memory
233            // segment may not be reliable anymore.
234            ClockStatus::Synchronized | ClockStatus::FreeRunning => {
235                if mono < self.as_of + CLOCKBOUND_RESTART_GRACE_PERIOD {
236                    // Allow for a restart of the daemon, for a short period of time, the status is
237                    // trusted to be correct.
238                    self.clock_status
239                } else if mono < self.void_after {
240                    // Beyond the grace period, for a free running status.
241                    ClockStatus::FreeRunning
242                } else {
243                    // If beyond void_after, no guarantee is provided anymore.
244                    ClockStatus::Unknown
245                }
246            }
247        };
248
249        // Calculate the duration that has elapsed between the instant when the CEB parameters were
250        // snapshot'ed from the SHM segment (approximated by `as_of`), and the instant when the
251        // request to calculate the CEB was actually requested (approximated by `mono`). This
252        // duration is used to compute the growth of the error bound due to local dispersion
253        // between polling chrony and now.
254        //
255        // To avoid miscalculation in case the synchronization daemon is restarted, a
256        // CLOCK_MONOTONIC is used, since it is designed to not jump. Because we want this to be
257        // fast, and the exact accuracy is not critical here, we use CLOCK_MONOTONIC_COARSE on
258        // platforms that support it.
259        //
260        // But ... there is a catch. When validating causality of these events that is, `as_of`
261        // should always be older than `mono`, we observed this test to sometimes fail, with `mono`
262        // being older by a handful of nanoseconds. The root cause is not completely understood,
263        // but points to the clock resolution and/or update strategy and/or propagation of the
264        // updates through the VDSO memory page. See this for details:
265        // https://t.corp.amazon.com/P101954401.
266        //
267        // The following implementation is a mitigation.
268        //   1. if as_of <= mono is younger than as_of, calculate the duration (happy path)
269        //   2. if as_of - epsilon < mono < as_of, set the duration to 0
270        //   3. if mono < as_of - epsilon, return an error
271        //
272        // In short, this relaxes the sanity check a bit to accept some imprecision in the clock
273        // reading routines.
274        //
275        // What is a good value for `epsilon`?
276        // The CLOCK_MONOTONIC_COARSE resolution is a function of the HZ kernel variable defining
277        // the last kernel tick that drives this clock (e.g. HZ=250 leads to a 4 millisecond
278        // resolution). We could use the `clock_getres()` system call to retrieve this value but
279        // this makes diagnosing over different platform / OS configurations more complex. Instead
280        // settling on an arbitrary default value of 1 millisecond.
281        let causality_blur = self.as_of - TimeSpec::new(0, 1000);
282
283        let duration = if mono >= self.as_of {
284            // Happy path, no causality doubt
285            mono - self.as_of
286        } else if mono > causality_blur {
287            // Causality is "almost" broken. We are within a range that could be due to the clock
288            // precision. Let's approximate this to equality between mono and as_of.
289            TimeSpec::new(0, 0)
290        } else {
291            // Causality is breached.
292            return Err(ShmError::CausalityBreach);
293        };
294
295        // Inflate the bound on clock error with the maximum drift the clock may be experiencing
296        // between the snapshot being read and ~now.
297        let duration_sec = duration.num_nanoseconds() as f64 / 1_000_000_000_f64;
298        let updated_bound = TimeSpec::nanoseconds(
299            self.bound_nsec + (duration_sec * self.max_drift_ppb as f64) as i64,
300        );
301
302        // Build the (earliest, latest) interval within which true time exists.
303        let earliest = real - updated_bound;
304        let latest = real + updated_bound;
305
306        Ok((earliest, latest, clock_status))
307    }
308}
309
310#[cfg(test)]
311mod t_lib {
312    use super::*;
313
314    // Convenience macro to build ClockBoundError for unit tests
315    macro_rules! clockbound {
316        (($asof_tv_sec:literal, $asof_tv_nsec:literal), ($after_tv_sec:literal, $after_tv_nsec:literal)) => {
317            ClockErrorBound::new(
318                TimeSpec::new($asof_tv_sec, $asof_tv_nsec), // as_of
319                TimeSpec::new($after_tv_sec, $after_tv_nsec), // void_after
320                10000,                                      // bound_nsec
321                0,                                          // disruption_marker
322                1000,                                       // max_drift_ppb
323                ClockStatus::Synchronized,                  // clock_status
324                true,                                       // clock_disruption_support_enabled
325            )
326        };
327    }
328
329    /// Assert the bound on clock error is computed correctly
330    #[test]
331    fn compute_bound_ok() {
332        let ceb = clockbound!((0, 0), (10, 0));
333        let real = TimeSpec::new(2, 0);
334        let mono = TimeSpec::new(2, 0);
335
336        let (earliest, latest, status) = ceb
337            .compute_bound_at(real, mono)
338            .expect("Failed to compute bound");
339
340        // 2 seconds have passed since the bound was snapshot, hence 2 microsec of drift on top of
341        // the default 10 microsec put in the ClockBoundError data
342        assert_eq!(earliest.tv_sec(), 1);
343        assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 12_000);
344        assert_eq!(latest.tv_sec(), 2);
345        assert_eq!(latest.tv_nsec(), 12_000);
346        assert_eq!(status, ClockStatus::Synchronized);
347    }
348
349    /// Assert the bound on clock error is computed correctly, with realtime and monotonic clocks
350    /// disagreeing on time
351    #[test]
352    fn compute_bound_ok_when_real_ahead() {
353        let ceb = clockbound!((0, 0), (10, 0));
354        let real = TimeSpec::new(20, 0); // realtime clock way ahead
355        let mono = TimeSpec::new(4, 0);
356
357        let (earliest, latest, status) = ceb
358            .compute_bound_at(real, mono)
359            .expect("Failed to compute bound");
360
361        // 4 seconds have passed since the bound was snapshot, hence 4 microsec of drift on top of
362        // the default 10 microsec put in the ClockBoundError data
363        assert_eq!(earliest.tv_sec(), 19);
364        assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 14_000);
365        assert_eq!(latest.tv_sec(), 20);
366        assert_eq!(latest.tv_nsec(), 14_000);
367        assert_eq!(status, ClockStatus::Synchronized);
368    }
369
370    /// Assert the clock status is FreeRunning if the ClockErrorBound data is passed the grace
371    /// period
372    #[test]
373    fn compute_bound_force_free_running_status() {
374        let ceb = clockbound!((0, 0), (100, 0));
375        let real = TimeSpec::new(8, 0);
376        let mono = TimeSpec::new(8, 0);
377
378        let (earliest, latest, status) = ceb
379            .compute_bound_at(real, mono)
380            .expect("Failed to compute bound");
381
382        // 8 seconds have passed since the bound was snapshot, hence 8 microsec of drift on top of
383        // the default 10 microsec put in the ClockBoundError data
384        assert_eq!(earliest.tv_sec(), 7);
385        assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 18_000);
386        assert_eq!(latest.tv_sec(), 8);
387        assert_eq!(latest.tv_nsec(), 18_000);
388        assert_eq!(status, ClockStatus::FreeRunning);
389    }
390
391    /// Assert the clock status is Unknown if the ClockErrorBound data is passed void_after
392    #[test]
393    fn compute_bound_unknown_status_if_expired() {
394        let ceb = clockbound!((0, 0), (5, 0));
395        let real = TimeSpec::new(10, 0);
396        let mono = TimeSpec::new(10, 0); // Passed void_after
397
398        let (earliest, latest, status) = ceb
399            .compute_bound_at(real, mono)
400            .expect("Failed to compute bound");
401
402        // 10 seconds have passed since the bound was snapshot, hence 10 microsec of drift on top of
403        // the default 10 microsec put in the ClockBoundError data
404        assert_eq!(earliest.tv_sec(), 9);
405        assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 20_000);
406        assert_eq!(latest.tv_sec(), 10);
407        assert_eq!(latest.tv_nsec(), 20_000);
408        assert_eq!(status, ClockStatus::Unknown);
409    }
410
411    /// Assert errors are returned if the ClockBoundError data is malformed with bad drift
412    #[test]
413    fn compute_bound_bad_drift() {
414        let mut ceb = clockbound!((0, 0), (10, 0));
415        let real = TimeSpec::new(5, 0);
416        let mono = TimeSpec::new(5, 0);
417        ceb.max_drift_ppb = 2_000_000_000;
418
419        assert!(ceb.compute_bound_at(real, mono).is_err());
420    }
421
422    /// Assert errors are returned if the ClockBoundError data snapshot has been taken after
423    /// reading clocks at 'now'
424    #[test]
425    fn compute_bound_causality_break() {
426        let ceb = clockbound!((5, 0), (10, 0));
427        let real = TimeSpec::new(1, 0);
428        let mono = TimeSpec::new(1, 0);
429
430        let res = ceb.compute_bound_at(real, mono);
431
432        assert!(res.is_err());
433    }
434}