clock_bound_shm/lib.rs
1//! ClockBound Shared Memory
2//!
3//! This crate implements the low-level IPC functionality to share ClockErrorBound data and clock
4//! status over a shared memory segment. This crate is meant to be used by the C and Rust versions
5//! of the ClockBound client library.
6
7// TODO: prevent clippy from checking for dead code. The writer module is only re-exported publicly
8// if the write feature is selected. There may be a better way to do that and re-enable the lint.
9#![allow(dead_code)]
10
11// Re-exports reader and writer. The writer is conditionally included under the "writer" feature.
12pub use crate::reader::ShmReader;
13#[cfg(feature = "writer")]
14pub use crate::writer::{ShmWrite, ShmWriter};
15
16pub mod common;
17mod reader;
18mod shm_header;
19mod writer;
20
21use errno::Errno;
22use nix::sys::time::{TimeSpec, TimeValLike};
23use std::ffi::CStr;
24
25use common::{clock_gettime_safe, CLOCK_MONOTONIC, CLOCK_REALTIME};
26
27const CLOCKBOUND_RESTART_GRACE_PERIOD: TimeSpec = TimeSpec::new(5, 0);
28
29/// Convenience macro to build a ShmError::SyscallError with extra info from errno and custom
30/// origin information.
31#[macro_export]
32macro_rules! syserror {
33 ($origin:expr) => {
34 Err($crate::ShmError::SyscallError(
35 ::errno::errno(),
36 ::std::ffi::CStr::from_bytes_with_nul(concat!($origin, "\0").as_bytes()).unwrap(),
37 ))
38 };
39}
40
41/// Error condition returned by all low-level ClockBound APIs.
42#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
43pub enum ShmError {
44 /// A system call failed.
45 /// Variant includes the Errno struct with error details, and an indication on the origin of
46 /// the system call that error'ed.
47 SyscallError(Errno, &'static CStr),
48
49 /// The shared memory segment is not initialized.
50 SegmentNotInitialized,
51
52 /// The shared memory segment is initialized but malformed.
53 SegmentMalformed,
54
55 /// Failed causality check when comparing timestamps.
56 CausalityBreach,
57
58 /// The shared memory segment version is not supported.
59 SegmentVersionNotSupported,
60}
61
62/// Definition of mutually exclusive clock status exposed to the reader.
63#[repr(C)]
64#[derive(Debug, Copy, Clone, PartialEq)]
65pub enum ClockStatus {
66 /// The status of the clock is unknown.
67 /// In this clock status, error-bounded timestamps should not be trusted.
68 Unknown = 0,
69
70 /// The clock is kept accurate by the synchronization daemon.
71 /// In this clock status, error-bounded timestamps can be trusted.
72 Synchronized = 1,
73
74 /// The clock is free running and not updated by the synchronization daemon.
75 /// In this clock status, error-bounded timestamps can be trusted.
76 FreeRunning = 2,
77
78 /// The clock has been disrupted and the accuracy of time cannot be bounded.
79 /// In this clock status, error-bounded timestamps should not be trusted.
80 Disrupted = 3,
81}
82
83/// Structure that holds the ClockErrorBound data captured at a specific point in time and valid
84/// until a subsequent point in time.
85///
86/// The ClockErrorBound structure supports calculating the actual bound on clock error at any time,
87/// using its `now()` method. The internal fields are not meant to be accessed directly.
88///
89/// Note that the timestamps in between which this ClockErrorBound data is valid are captured using
90/// a CLOCK_MONOTONIC_COARSE clock. The monotonic clock id is required to correctly measure the
91/// duration during which clock drift possibly accrues, and avoid events when the clock is set,
92/// smeared or affected by leap seconds.
93///
94/// The structure is shared across the Shared Memory segment and has a C representation to enforce
95/// this specific layout.
96#[repr(C)]
97#[derive(Debug, Copy, Clone, PartialEq)]
98pub struct ClockErrorBound {
99 /// The CLOCK_MONOTONIC_COARSE timestamp recorded when the bound on clock error was
100 /// calculated. The current implementation relies on Chrony tracking data, which accounts for
101 /// the dispersion between the last clock processing event, and the reading of tracking data.
102 as_of: TimeSpec,
103
104 /// The CLOCK_MONOTONIC_COARSE timestamp beyond which the bound on clock error should not be
105 /// trusted. This is a useful signal that the communication with the synchronization daemon is
106 /// has failed, for example.
107 void_after: TimeSpec,
108
109 /// An absolute upper bound on the accuracy of the `CLOCK_REALTIME` clock with regards to true
110 /// time at the instant represented by `as_of`.
111 bound_nsec: i64,
112
113 /// Disruption marker.
114 ///
115 /// This value is incremented (by an unspecified delta) each time the clock has been disrupted.
116 /// This count value is specific to a particular VM/EC2 instance.
117 pub disruption_marker: u64,
118
119 /// Maximum drift rate of the clock between updates of the synchronization daemon. The value
120 /// stored in `bound_nsec` should increase by the following to account for the clock drift
121 /// since `bound_nsec` was computed:
122 /// `bound_nsec += max_drift_ppb * (now - as_of)`
123 max_drift_ppb: u32,
124
125 /// The synchronization daemon status indicates whether the daemon is synchronized,
126 /// free-running, etc.
127 clock_status: ClockStatus,
128
129 /// Clock disruption support enabled flag.
130 ///
131 /// This indicates whether or not the ClockBound daemon was started with a
132 /// configuration that supports detecting clock disruptions.
133 pub clock_disruption_support_enabled: bool,
134
135 /// Padding.
136 _padding: [u8; 7],
137}
138
139impl Default for ClockErrorBound {
140 /// Get a default ClockErrorBound struct
141 /// Equivalent to zero'ing this bit of memory
142 fn default() -> Self {
143 ClockErrorBound {
144 as_of: TimeSpec::new(0, 0),
145 void_after: TimeSpec::new(0, 0),
146 bound_nsec: 0,
147 disruption_marker: 0,
148 max_drift_ppb: 0,
149 clock_status: ClockStatus::Unknown,
150 clock_disruption_support_enabled: false,
151 _padding: [0u8; 7],
152 }
153 }
154}
155
156impl ClockErrorBound {
157 /// Create a new ClockErrorBound struct.
158 pub fn new(
159 as_of: TimeSpec,
160 void_after: TimeSpec,
161 bound_nsec: i64,
162 disruption_marker: u64,
163 max_drift_ppb: u32,
164 clock_status: ClockStatus,
165 clock_disruption_support_enabled: bool,
166 ) -> ClockErrorBound {
167 ClockErrorBound {
168 as_of,
169 void_after,
170 bound_nsec,
171 disruption_marker,
172 max_drift_ppb,
173 clock_status,
174 clock_disruption_support_enabled,
175 _padding: [0u8; 7],
176 }
177 }
178
179 /// The ClockErrorBound equivalent of clock_gettime(), but with bound on accuracy.
180 ///
181 /// Returns a pair of (earliest, latest) timespec between which current time exists. The
182 /// interval width is twice the clock error bound (ceb) such that:
183 /// (earliest, latest) = ((now - ceb), (now + ceb))
184 /// The function also returns a clock status to assert that the clock is being synchronized, or
185 /// free-running, or ...
186 pub fn now(&self) -> Result<(TimeSpec, TimeSpec, ClockStatus), ShmError> {
187 // Read the clock, start with the REALTIME one to be as close as possible to the event the
188 // caller is interested in. The monotonic clock should be read after. It is correct for the
189 // process be preempted between the two calls: a delayed read of the monotonic clock will
190 // make the bound on clock error more pessimistic, but remains correct.
191 let real = clock_gettime_safe(CLOCK_REALTIME)?;
192 let mono = clock_gettime_safe(CLOCK_MONOTONIC)?;
193
194 self.compute_bound_at(real, mono)
195 }
196
197 /// Compute the bound on clock error at a given point in time.
198 ///
199 /// The time at which the bound is computed is defined by the (real, mono) pair of timestamps
200 /// read from the realtime and monotonic clock respectively, *roughly* at the same time. The
201 /// details to correctly work around the "rough" alignment of the timestamps is not something
202 /// we want to leave to the user of ClockBound, hence this method is private. Although `now()`
203 /// may be it only caller, decoupling the two make writing unit tests a bit easier.
204 fn compute_bound_at(
205 &self,
206 real: TimeSpec,
207 mono: TimeSpec,
208 ) -> Result<(TimeSpec, TimeSpec, ClockStatus), ShmError> {
209 // Sanity checks:
210 // - `now()` should operate on a consistent snapshot of the shared memory segment, and
211 // causality between mono and as_of should be enforced.
212 // - a extremely high value of the `max_drift_ppb` is a sign of something going wrong
213 if self.max_drift_ppb >= 1_000_000_000 {
214 return Err(ShmError::SegmentMalformed);
215 }
216
217 // If the ClockErrorBound data has not been updated "recently", the status of the clock
218 // cannot be guaranteed. Things are ambiguous, the synchronization daemon may be dead, or
219 // its interaction with the clockbound daemon is broken, or ... In any case, we signal the
220 // caller that guarantees are gone. We could return an Err here, but choosing to leverage
221 // ClockStatus instead, and putting the responsibility on the caller to check the clock
222 // status value being returned.
223 // TODO: this may not be the most ergonomic decision, putting a pin here to revisit this
224 // decision once the client code is fleshed out.
225 let clock_status = match self.clock_status {
226 // If the status in the shared memory segment is Unknown or Disrupted, returns that
227 // status.
228 ClockStatus::Unknown | ClockStatus::Disrupted => self.clock_status,
229
230 // If the status is Synchronized or FreeRunning, the expectation from the client is
231 // that the data is useable. However, if the clockbound daemon died or has not update
232 // the shared memory segment in a while, the status written to the shared memory
233 // segment may not be reliable anymore.
234 ClockStatus::Synchronized | ClockStatus::FreeRunning => {
235 if mono < self.as_of + CLOCKBOUND_RESTART_GRACE_PERIOD {
236 // Allow for a restart of the daemon, for a short period of time, the status is
237 // trusted to be correct.
238 self.clock_status
239 } else if mono < self.void_after {
240 // Beyond the grace period, for a free running status.
241 ClockStatus::FreeRunning
242 } else {
243 // If beyond void_after, no guarantee is provided anymore.
244 ClockStatus::Unknown
245 }
246 }
247 };
248
249 // Calculate the duration that has elapsed between the instant when the CEB parameters were
250 // snapshot'ed from the SHM segment (approximated by `as_of`), and the instant when the
251 // request to calculate the CEB was actually requested (approximated by `mono`). This
252 // duration is used to compute the growth of the error bound due to local dispersion
253 // between polling chrony and now.
254 //
255 // To avoid miscalculation in case the synchronization daemon is restarted, a
256 // CLOCK_MONOTONIC is used, since it is designed to not jump. Because we want this to be
257 // fast, and the exact accuracy is not critical here, we use CLOCK_MONOTONIC_COARSE on
258 // platforms that support it.
259 //
260 // But ... there is a catch. When validating causality of these events that is, `as_of`
261 // should always be older than `mono`, we observed this test to sometimes fail, with `mono`
262 // being older by a handful of nanoseconds. The root cause is not completely understood,
263 // but points to the clock resolution and/or update strategy and/or propagation of the
264 // updates through the VDSO memory page. See this for details:
265 // https://t.corp.amazon.com/P101954401.
266 //
267 // The following implementation is a mitigation.
268 // 1. if as_of <= mono is younger than as_of, calculate the duration (happy path)
269 // 2. if as_of - epsilon < mono < as_of, set the duration to 0
270 // 3. if mono < as_of - epsilon, return an error
271 //
272 // In short, this relaxes the sanity check a bit to accept some imprecision in the clock
273 // reading routines.
274 //
275 // What is a good value for `epsilon`?
276 // The CLOCK_MONOTONIC_COARSE resolution is a function of the HZ kernel variable defining
277 // the last kernel tick that drives this clock (e.g. HZ=250 leads to a 4 millisecond
278 // resolution). We could use the `clock_getres()` system call to retrieve this value but
279 // this makes diagnosing over different platform / OS configurations more complex. Instead
280 // settling on an arbitrary default value of 1 millisecond.
281 let causality_blur = self.as_of - TimeSpec::new(0, 1000);
282
283 let duration = if mono >= self.as_of {
284 // Happy path, no causality doubt
285 mono - self.as_of
286 } else if mono > causality_blur {
287 // Causality is "almost" broken. We are within a range that could be due to the clock
288 // precision. Let's approximate this to equality between mono and as_of.
289 TimeSpec::new(0, 0)
290 } else {
291 // Causality is breached.
292 return Err(ShmError::CausalityBreach);
293 };
294
295 // Inflate the bound on clock error with the maximum drift the clock may be experiencing
296 // between the snapshot being read and ~now.
297 let duration_sec = duration.num_nanoseconds() as f64 / 1_000_000_000_f64;
298 let updated_bound = TimeSpec::nanoseconds(
299 self.bound_nsec + (duration_sec * self.max_drift_ppb as f64) as i64,
300 );
301
302 // Build the (earliest, latest) interval within which true time exists.
303 let earliest = real - updated_bound;
304 let latest = real + updated_bound;
305
306 Ok((earliest, latest, clock_status))
307 }
308}
309
310#[cfg(test)]
311mod t_lib {
312 use super::*;
313
314 // Convenience macro to build ClockBoundError for unit tests
315 macro_rules! clockbound {
316 (($asof_tv_sec:literal, $asof_tv_nsec:literal), ($after_tv_sec:literal, $after_tv_nsec:literal)) => {
317 ClockErrorBound::new(
318 TimeSpec::new($asof_tv_sec, $asof_tv_nsec), // as_of
319 TimeSpec::new($after_tv_sec, $after_tv_nsec), // void_after
320 10000, // bound_nsec
321 0, // disruption_marker
322 1000, // max_drift_ppb
323 ClockStatus::Synchronized, // clock_status
324 true, // clock_disruption_support_enabled
325 )
326 };
327 }
328
329 /// Assert the bound on clock error is computed correctly
330 #[test]
331 fn compute_bound_ok() {
332 let ceb = clockbound!((0, 0), (10, 0));
333 let real = TimeSpec::new(2, 0);
334 let mono = TimeSpec::new(2, 0);
335
336 let (earliest, latest, status) = ceb
337 .compute_bound_at(real, mono)
338 .expect("Failed to compute bound");
339
340 // 2 seconds have passed since the bound was snapshot, hence 2 microsec of drift on top of
341 // the default 10 microsec put in the ClockBoundError data
342 assert_eq!(earliest.tv_sec(), 1);
343 assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 12_000);
344 assert_eq!(latest.tv_sec(), 2);
345 assert_eq!(latest.tv_nsec(), 12_000);
346 assert_eq!(status, ClockStatus::Synchronized);
347 }
348
349 /// Assert the bound on clock error is computed correctly, with realtime and monotonic clocks
350 /// disagreeing on time
351 #[test]
352 fn compute_bound_ok_when_real_ahead() {
353 let ceb = clockbound!((0, 0), (10, 0));
354 let real = TimeSpec::new(20, 0); // realtime clock way ahead
355 let mono = TimeSpec::new(4, 0);
356
357 let (earliest, latest, status) = ceb
358 .compute_bound_at(real, mono)
359 .expect("Failed to compute bound");
360
361 // 4 seconds have passed since the bound was snapshot, hence 4 microsec of drift on top of
362 // the default 10 microsec put in the ClockBoundError data
363 assert_eq!(earliest.tv_sec(), 19);
364 assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 14_000);
365 assert_eq!(latest.tv_sec(), 20);
366 assert_eq!(latest.tv_nsec(), 14_000);
367 assert_eq!(status, ClockStatus::Synchronized);
368 }
369
370 /// Assert the clock status is FreeRunning if the ClockErrorBound data is passed the grace
371 /// period
372 #[test]
373 fn compute_bound_force_free_running_status() {
374 let ceb = clockbound!((0, 0), (100, 0));
375 let real = TimeSpec::new(8, 0);
376 let mono = TimeSpec::new(8, 0);
377
378 let (earliest, latest, status) = ceb
379 .compute_bound_at(real, mono)
380 .expect("Failed to compute bound");
381
382 // 8 seconds have passed since the bound was snapshot, hence 8 microsec of drift on top of
383 // the default 10 microsec put in the ClockBoundError data
384 assert_eq!(earliest.tv_sec(), 7);
385 assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 18_000);
386 assert_eq!(latest.tv_sec(), 8);
387 assert_eq!(latest.tv_nsec(), 18_000);
388 assert_eq!(status, ClockStatus::FreeRunning);
389 }
390
391 /// Assert the clock status is Unknown if the ClockErrorBound data is passed void_after
392 #[test]
393 fn compute_bound_unknown_status_if_expired() {
394 let ceb = clockbound!((0, 0), (5, 0));
395 let real = TimeSpec::new(10, 0);
396 let mono = TimeSpec::new(10, 0); // Passed void_after
397
398 let (earliest, latest, status) = ceb
399 .compute_bound_at(real, mono)
400 .expect("Failed to compute bound");
401
402 // 10 seconds have passed since the bound was snapshot, hence 10 microsec of drift on top of
403 // the default 10 microsec put in the ClockBoundError data
404 assert_eq!(earliest.tv_sec(), 9);
405 assert_eq!(earliest.tv_nsec(), 1_000_000_000 - 20_000);
406 assert_eq!(latest.tv_sec(), 10);
407 assert_eq!(latest.tv_nsec(), 20_000);
408 assert_eq!(status, ClockStatus::Unknown);
409 }
410
411 /// Assert errors are returned if the ClockBoundError data is malformed with bad drift
412 #[test]
413 fn compute_bound_bad_drift() {
414 let mut ceb = clockbound!((0, 0), (10, 0));
415 let real = TimeSpec::new(5, 0);
416 let mono = TimeSpec::new(5, 0);
417 ceb.max_drift_ppb = 2_000_000_000;
418
419 assert!(ceb.compute_bound_at(real, mono).is_err());
420 }
421
422 /// Assert errors are returned if the ClockBoundError data snapshot has been taken after
423 /// reading clocks at 'now'
424 #[test]
425 fn compute_bound_causality_break() {
426 let ceb = clockbound!((5, 0), (10, 0));
427 let real = TimeSpec::new(1, 0);
428 let mono = TimeSpec::new(1, 0);
429
430 let res = ceb.compute_bound_at(real, mono);
431
432 assert!(res.is_err());
433 }
434}