rt_watchdog/
lib.rs

1mod asm;
2mod deadline;
3#[cfg(test)]
4mod tests;
5
6use std::{
7  cell::UnsafeCell,
8  sync::atomic::{AtomicU32, AtomicUsize, Ordering},
9  time::Duration,
10};
11
12use asm::PLATFORM_SUPPORTED;
13use libc::c_void;
14
15use crate::deadline::sched_attr;
16
17extern "C" {
18  fn rt_watchdog_thread_entry(ctx: &Context) -> !;
19}
20
21#[repr(C)]
22pub struct Context {
23  pub counter: AtomicUsize,
24  fence: AtomicU32,
25
26  stack: Stack,
27}
28
29#[repr(C, align(16))]
30struct Stack(UnsafeCell<[u8; 256]>);
31
32unsafe impl Send for Stack {}
33unsafe impl Sync for Stack {}
34
35impl Stack {
36  fn end(&self) -> *mut c_void {
37    unsafe { self.0.get().offset(1) as *mut c_void }
38  }
39}
40
41#[derive(Copy, Clone, Debug)]
42struct DeadlineParams {
43  runtime: Duration,
44  period: Duration,
45}
46
47impl DeadlineParams {
48  fn gen_attr(&self) -> sched_attr {
49    sched_attr {
50      size: std::mem::size_of::<sched_attr>() as u32,
51      sched_policy: deadline::consts::SCHED_DEADLINE,
52      sched_flags: 0,
53      sched_nice: 0,
54      sched_priority: 0,
55      sched_runtime: self.runtime.as_nanos() as u64,
56      sched_deadline: self.period.as_nanos() as u64,
57      sched_period: self.period.as_nanos() as u64,
58    }
59  }
60}
61
62#[derive(Clone, Copy, Debug, Eq, PartialEq)]
63pub enum Strategy {
64  RealtimeOnly,
65  FallbackOnly,
66  RealtimeOrFallback,
67}
68
69pub fn start_watchdog(strategy: Strategy, check_interval: Duration) -> &'static Context {
70  unsafe { do_start_watchdog(strategy, check_interval) }
71}
72
73unsafe fn do_start_watchdog(strategy: Strategy, check_interval: Duration) -> &'static Context {
74  let page_size = libc::sysconf(libc::_SC_PAGESIZE);
75  assert!(page_size >= std::mem::size_of::<Context>() as i64);
76  let page_size = page_size as usize;
77  let context_page = libc::mmap(
78    std::ptr::null_mut(),
79    page_size,
80    libc::PROT_READ | libc::PROT_WRITE,
81    libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
82    -1,
83    0,
84  );
85  if context_page.is_null() {
86    panic!("failed to map context page");
87  }
88  let ctx: &'static Context = &*(context_page as *mut Context);
89  let dl_params = DeadlineParams {
90    runtime: Duration::from_micros(50),
91    period: check_interval,
92  };
93
94  if strategy != Strategy::FallbackOnly && detect_platform_support(dl_params) {
95    let rt_watchdog_thread_entry = rt_watchdog_thread_entry as usize;
96    assert!(rt_watchdog_thread_entry & (page_size - 1) == 0);
97
98    // lock code and context data pages so that swapping does not
99    // break real-time guarantees
100    if libc::mlock(context_page, page_size) == 0
101      && libc::mlock(rt_watchdog_thread_entry as *const _, page_size) == 0
102    {
103      let pid = libc::clone(
104        std::mem::transmute(rt_watchdog_thread_entry),
105        ctx.stack.end(),
106        libc::CLONE_VM
107          | libc::CLONE_FS
108          | libc::CLONE_FILES
109          | libc::CLONE_SIGHAND
110          | libc::CLONE_THREAD
111          | libc::CLONE_SYSVSEM,
112        ctx as *const Context as *const c_void as *mut c_void,
113      );
114      if pid < 0 {
115        let err = std::io::Error::last_os_error();
116        panic!("failed to start watchdog thread: {:?}", err);
117      }
118      assert!(pid > 0);
119
120      // set priority
121      let mut attr = dl_params.gen_attr();
122      if libc::syscall(
123        libc::SYS_sched_setattr,
124        pid,
125        &mut attr as *mut sched_attr,
126        0,
127      ) < 0
128      {
129        let err = std::io::Error::last_os_error();
130        panic!("failed to set rt watchdog priority: {:?}", err);
131      }
132
133      // notify the thread to start
134      ctx.fence.store(1, Ordering::SeqCst);
135      let ret = libc::syscall(libc::SYS_futex, &ctx.fence, libc::FUTEX_WAKE, 1, 0, 0, 0);
136      assert!(ret >= 0);
137
138      // wait for ack
139      while ctx.fence.load(Ordering::SeqCst) != 2 {
140        let ret = libc::syscall(libc::SYS_futex, &ctx.fence, libc::FUTEX_WAIT, 1, 0, 0, 0);
141        let errno = *libc::__errno_location();
142        assert!(ret == 0 || (ret < 0 && errno == libc::EAGAIN)); // glibc handles EINTR?
143      }
144
145      return ctx;
146    }
147  }
148
149  if strategy == Strategy::RealtimeOnly {
150    panic!("failed to start realtime watchdog on current platform");
151  }
152
153  eprintln!("falling back to non-realtime watchdog");
154  std::thread::spawn(move || fallback_impl(ctx, check_interval));
155  ctx
156}
157
158fn fallback_impl(ctx: &'static Context, check_interval: Duration) -> ! {
159  let mut last_counter = 0usize;
160
161  loop {
162    std::thread::sleep(check_interval);
163    let counter = ctx.counter.load(Ordering::SeqCst);
164    if counter == last_counter {
165      std::process::abort();
166    }
167    last_counter = counter;
168  }
169}
170
171unsafe fn detect_platform_support(dl_params: DeadlineParams) -> bool {
172  if !PLATFORM_SUPPORTED {
173    return false;
174  }
175
176  // detect availability of SCHED_DEADLINE
177  let sched_res = std::thread::spawn(move || {
178    let mut dl_attr = dl_params.gen_attr();
179    let mut normal_attr = sched_attr {
180      size: std::mem::size_of::<sched_attr>() as u32,
181      sched_policy: deadline::consts::SCHED_NORMAL,
182      ..Default::default()
183    };
184    if libc::syscall(
185      libc::SYS_sched_setattr,
186      0,
187      &mut dl_attr as *mut sched_attr,
188      0,
189    ) < 0
190    {
191      Err(std::io::Error::last_os_error())
192    } else {
193      // reset attr so that thread finalizers are executed normally
194      let ret = libc::syscall(
195        libc::SYS_sched_setattr,
196        0,
197        &mut normal_attr as *mut sched_attr,
198        0,
199      );
200      if ret != 0 {
201        std::process::abort();
202      }
203      Ok(())
204    }
205  })
206  .join()
207  .unwrap();
208  if let Err(e) = sched_res {
209    eprintln!("deadline params are not supported: {:?}", e);
210    return false;
211  }
212  true
213}