vmm_sys_util/linux/poll.rs
1// Copyright 2019 Intel Corporation. All Rights Reserved.
2//
3// Copyright 2017 The Chromium OS Authors. All rights reserved.
4//
5// SPDX-License-Identifier: BSD-3-Clause
6
7//! Traits and structures for working with
8//! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html)
9
10use std::cell::{Cell, Ref, RefCell};
11use std::cmp::min;
12use std::fs::File;
13use std::io::{stderr, Cursor, Write};
14use std::marker::PhantomData;
15use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
16use std::ptr::null_mut;
17use std::slice;
18use std::thread;
19use std::time::Duration;
20
21use libc::{
22 c_int, epoll_create1, epoll_ctl, epoll_event, epoll_wait, EINTR, EPOLLERR, EPOLLHUP, EPOLLIN,
23 EPOLLOUT, EPOLL_CLOEXEC, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD,
24};
25
26use crate::errno::{errno_result, Error, Result};
27
28macro_rules! handle_eintr_errno {
29 ($x:expr) => {{
30 let mut res;
31 loop {
32 res = $x;
33 if res != -1 || Error::last() != Error::new(EINTR) {
34 break;
35 }
36 }
37 res
38 }};
39}
40
41const POLL_CONTEXT_MAX_EVENTS: usize = 16;
42
43/// A wrapper of raw `libc::epoll_event`.
44///
45/// This should only be used with [`EpollContext`](struct.EpollContext.html).
46pub struct EpollEvents(RefCell<[epoll_event; POLL_CONTEXT_MAX_EVENTS]>);
47
48impl std::fmt::Debug for EpollEvents {
49 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50 write!(f, "EpollEvents {{ ... }}")
51 }
52}
53
54impl EpollEvents {
55 /// Creates a new EpollEvents.
56 pub fn new() -> EpollEvents {
57 EpollEvents(RefCell::new(
58 [epoll_event { events: 0, u64: 0 }; POLL_CONTEXT_MAX_EVENTS],
59 ))
60 }
61}
62
63impl Default for EpollEvents {
64 fn default() -> Self {
65 Self::new()
66 }
67}
68
69/// Trait for a token that can be associated with an `fd` in a [`PollContext`](struct.PollContext.html).
70///
71/// Simple enums that have no or primitive variant data can use the `#[derive(PollToken)]`
72/// custom derive to implement this trait.
73pub trait PollToken {
74 /// Converts this token into a u64 that can be turned back into a token via `from_raw_token`.
75 fn as_raw_token(&self) -> u64;
76
77 /// Converts a raw token as returned from `as_raw_token` back into a token.
78 ///
79 /// It is invalid to give a raw token that was not returned via `as_raw_token` from the same
80 /// `Self`. The implementation can expect that this will never happen as a result of its usage
81 /// in `PollContext`.
82 fn from_raw_token(data: u64) -> Self;
83}
84
85impl PollToken for usize {
86 fn as_raw_token(&self) -> u64 {
87 *self as u64
88 }
89
90 fn from_raw_token(data: u64) -> Self {
91 data as Self
92 }
93}
94
95impl PollToken for u64 {
96 fn as_raw_token(&self) -> u64 {
97 *self
98 }
99
100 fn from_raw_token(data: u64) -> Self {
101 data as Self
102 }
103}
104
105impl PollToken for u32 {
106 fn as_raw_token(&self) -> u64 {
107 u64::from(*self)
108 }
109
110 fn from_raw_token(data: u64) -> Self {
111 data as Self
112 }
113}
114
115impl PollToken for u16 {
116 fn as_raw_token(&self) -> u64 {
117 u64::from(*self)
118 }
119
120 fn from_raw_token(data: u64) -> Self {
121 data as Self
122 }
123}
124
125impl PollToken for u8 {
126 fn as_raw_token(&self) -> u64 {
127 u64::from(*self)
128 }
129
130 fn from_raw_token(data: u64) -> Self {
131 data as Self
132 }
133}
134
135impl PollToken for () {
136 fn as_raw_token(&self) -> u64 {
137 0
138 }
139
140 fn from_raw_token(_data: u64) -> Self {}
141}
142
143/// An event returned by [`PollContext::wait`](struct.PollContext.html#method.wait).
144pub struct PollEvent<'a, T> {
145 event: &'a epoll_event,
146 token: PhantomData<T>, // Needed to satisfy usage of T
147}
148
149impl<T> std::fmt::Debug for PollEvent<'_, T> {
150 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
151 f.debug_struct("PollEvent")
152 .field("event", &"?")
153 .field("token", &self.token)
154 .finish()
155 }
156}
157
158impl<T: PollToken> PollEvent<'_, T> {
159 /// Gets the token associated in
160 /// [`PollContext::add`](struct.PollContext.html#method.add) with this event.
161 pub fn token(&self) -> T {
162 T::from_raw_token(self.event.u64)
163 }
164
165 /// Get the raw events returned by the kernel.
166 pub fn raw_events(&self) -> u32 {
167 self.event.events
168 }
169
170 /// Checks if the event is readable.
171 ///
172 /// True if the `fd` associated with this token in
173 /// [`PollContext::add`](struct.PollContext.html#method.add) is readable.
174 pub fn readable(&self) -> bool {
175 self.event.events & (EPOLLIN as u32) != 0
176 }
177
178 /// Checks if the event is writable.
179 ///
180 /// True if the `fd` associated with this token in
181 /// [`PollContext::add`](struct.PollContext.html#method.add) is writable.
182 pub fn writable(&self) -> bool {
183 self.event.events & (EPOLLOUT as u32) != 0
184 }
185
186 /// Checks if the event has been hangup on.
187 ///
188 /// True if the `fd` associated with this token in
189 /// [`PollContext::add`](struct.PollContext.html#method.add) has been hungup on.
190 pub fn hungup(&self) -> bool {
191 self.event.events & (EPOLLHUP as u32) != 0
192 }
193
194 /// Checks if the event has associated error conditions.
195 ///
196 /// True if the `fd` associated with this token in
197 /// [`PollContext::add`](struct.PollContext.html#method.add) has associated error conditions.
198 pub fn has_error(&self) -> bool {
199 self.event.events & (EPOLLERR as u32) != 0
200 }
201}
202
203/// An iterator over a subset of events returned by
204/// [`PollContext::wait`](struct.PollContext.html#method.wait).
205#[derive(Debug)]
206pub struct PollEventIter<'a, I, T>
207where
208 I: Iterator<Item = &'a epoll_event>,
209{
210 mask: u32,
211 iter: I,
212 tokens: PhantomData<[T]>, // Needed to satisfy usage of T
213}
214
215impl<'a, I, T> Iterator for PollEventIter<'a, I, T>
216where
217 I: Iterator<Item = &'a epoll_event>,
218 T: PollToken,
219{
220 type Item = PollEvent<'a, T>;
221 fn next(&mut self) -> Option<Self::Item> {
222 let mask = self.mask;
223 self.iter
224 .find(|event| (event.events & mask) != 0)
225 .map(|event| PollEvent {
226 event,
227 token: PhantomData,
228 })
229 }
230}
231
232/// The list of events returned by [`PollContext::wait`](struct.PollContext.html#method.wait).
233pub struct PollEvents<'a, T> {
234 count: usize,
235 events: Ref<'a, [epoll_event; POLL_CONTEXT_MAX_EVENTS]>,
236 tokens: PhantomData<[T]>, // Needed to satisfy usage of T
237}
238
239impl<T> std::fmt::Debug for PollEvents<'_, T> {
240 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
241 f.debug_struct("PollEventsOwned")
242 .field("count", &self.count)
243 .field("events", &"?")
244 .field("tokens", &self.tokens)
245 .finish()
246 }
247}
248
249impl<T: PollToken> PollEvents<'_, T> {
250 /// Creates owned structure from borrowed [`PollEvents`](struct.PollEvents.html).
251 ///
252 /// Copies the events to an owned structure so the reference to this (and by extension
253 /// [`PollContext`](struct.PollContext.html)) can be dropped.
254 pub fn to_owned(&self) -> PollEventsOwned<T> {
255 PollEventsOwned {
256 count: self.count,
257 events: RefCell::new(*self.events),
258 tokens: PhantomData,
259 }
260 }
261
262 /// Iterates over each event.
263 pub fn iter(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
264 PollEventIter {
265 mask: 0xffff_ffff,
266 iter: self.events[..self.count].iter(),
267 tokens: PhantomData,
268 }
269 }
270
271 /// Iterates over each readable event.
272 pub fn iter_readable(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
273 PollEventIter {
274 mask: EPOLLIN as u32,
275 iter: self.events[..self.count].iter(),
276 tokens: PhantomData,
277 }
278 }
279
280 /// Iterates over each hungup event.
281 pub fn iter_hungup(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
282 PollEventIter {
283 mask: EPOLLHUP as u32,
284 iter: self.events[..self.count].iter(),
285 tokens: PhantomData,
286 }
287 }
288}
289
290/// A deep copy of the event records from [`PollEvents`](struct.PollEvents.html).
291pub struct PollEventsOwned<T> {
292 count: usize,
293 events: RefCell<[epoll_event; POLL_CONTEXT_MAX_EVENTS]>,
294 tokens: PhantomData<T>, // Needed to satisfy usage of T
295}
296
297impl<T> std::fmt::Debug for PollEventsOwned<T> {
298 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
299 f.debug_struct("PollEventsOwned")
300 .field("count", &self.count)
301 .field("events", &"?")
302 .field("tokens", &self.tokens)
303 .finish()
304 }
305}
306
307impl<T: PollToken> PollEventsOwned<T> {
308 /// Creates borrowed structure from owned structure
309 /// [`PollEventsOwned`](struct.PollEventsOwned.html).
310 ///
311 /// Takes a reference to the events so it can be iterated via methods in
312 /// [`PollEvents`](struct.PollEvents.html).
313 pub fn as_ref(&self) -> PollEvents<'_, T> {
314 PollEvents {
315 count: self.count,
316 events: self.events.borrow(),
317 tokens: PhantomData,
318 }
319 }
320}
321
322/// Watching events taken by [`PollContext`](struct.PollContext.html).
323#[derive(Debug, Copy, Clone)]
324pub struct WatchingEvents(u32);
325
326impl WatchingEvents {
327 /// Returns empty `WatchingEvents`.
328 #[inline(always)]
329 pub fn empty() -> WatchingEvents {
330 WatchingEvents(0)
331 }
332
333 /// Creates a new `WatchingEvents` with a specified value.
334 ///
335 /// Builds `WatchingEvents` from raw `epoll_event`.
336 ///
337 /// # Arguments
338 ///
339 /// * `raw`: the events to be created for watching.
340 #[inline(always)]
341 pub fn new(raw: u32) -> WatchingEvents {
342 WatchingEvents(raw)
343 }
344
345 /// Sets read events.
346 ///
347 /// Sets the events to be readable.
348 #[inline(always)]
349 pub fn set_read(self) -> WatchingEvents {
350 WatchingEvents(self.0 | EPOLLIN as u32)
351 }
352
353 /// Sets write events.
354 ///
355 /// Sets the events to be writable.
356 #[inline(always)]
357 pub fn set_write(self) -> WatchingEvents {
358 WatchingEvents(self.0 | EPOLLOUT as u32)
359 }
360
361 /// Gets the underlying epoll events.
362 pub fn get_raw(&self) -> u32 {
363 self.0
364 }
365}
366
367/// A wrapper of linux [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html).
368///
369/// It provides similar interface to [`PollContext`](struct.PollContext.html).
370/// It is thread safe while PollContext is not. It requires user to pass in a reference of
371/// EpollEvents while PollContext does not. Always use PollContext if you don't need to access the
372/// same epoll from different threads.
373///
374/// # Examples
375///
376/// ```
377/// extern crate vmm_sys_util;
378/// use vmm_sys_util::eventfd::EventFd;
379/// use vmm_sys_util::poll::{EpollContext, EpollEvents};
380///
381/// let evt = EventFd::new(0).unwrap();
382/// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
383/// let events = EpollEvents::new();
384///
385/// evt.write(1).unwrap();
386/// ctx.add(&evt, 1).unwrap();
387///
388/// for event in ctx.wait(&events).unwrap().iter_readable() {
389/// assert_eq!(event.token(), 1);
390/// }
391/// ```
392#[derive(Debug)]
393pub struct EpollContext<T> {
394 epoll_ctx: File,
395 // Needed to satisfy usage of T
396 tokens: PhantomData<[T]>,
397}
398
399impl<T: PollToken> EpollContext<T> {
400 /// Creates a new `EpollContext`.
401 ///
402 /// Uses [`epoll_create1`](http://man7.org/linux/man-pages/man2/epoll_create.2.html)
403 /// to create a new epoll fd.
404 ///
405 /// # Examples
406 ///
407 /// ```
408 /// extern crate vmm_sys_util;
409 /// use vmm_sys_util::poll::EpollContext;
410 ///
411 /// let ctx: EpollContext<usize> = EpollContext::new().unwrap();
412 /// ```
413 pub fn new() -> Result<EpollContext<T>> {
414 // SAFETY: Safe because we check the return value.
415 let epoll_fd = unsafe { epoll_create1(EPOLL_CLOEXEC) };
416 if epoll_fd < 0 {
417 return errno_result();
418 }
419 Ok(EpollContext {
420 // SAFETY: Safe because we verified that the FD is valid and we trust `epoll_create1`.
421 epoll_ctx: unsafe { File::from_raw_fd(epoll_fd) },
422 tokens: PhantomData,
423 })
424 }
425
426 /// Adds the given `fd` to this context and associates the given
427 /// `token` with the `fd`'s readable events.
428 ///
429 /// A `fd` can only be added once and does not need to be kept open.
430 /// If the `fd` is dropped and there were no duplicated file descriptors
431 /// (i.e. adding the same descriptor with a different FD number) added
432 /// to this context, events will not be reported by `wait` anymore.
433 ///
434 /// # Arguments
435 ///
436 /// * `fd`: the target file descriptor to be added.
437 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
438 ///
439 /// # Examples
440 ///
441 /// ```
442 /// extern crate vmm_sys_util;
443 /// use vmm_sys_util::eventfd::EventFd;
444 /// use vmm_sys_util::poll::EpollContext;
445 ///
446 /// let evt = EventFd::new(0).unwrap();
447 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
448 /// ctx.add(&evt, 1).unwrap();
449 /// ```
450 pub fn add(&self, fd: &dyn AsRawFd, token: T) -> Result<()> {
451 self.add_fd_with_events(fd, WatchingEvents::empty().set_read(), token)
452 }
453
454 /// Adds the given `fd` to this context, watching for the specified `events`
455 /// and associates the given 'token' with those events.
456 ///
457 /// A `fd` can only be added once and does not need to be kept open. If the `fd`
458 /// is dropped and there were no duplicated file descriptors (i.e. adding the same
459 /// descriptor with a different FD number) added to this context, events will
460 /// not be reported by `wait` anymore.
461 ///
462 /// # Arguments
463 ///
464 /// * `fd`: the target file descriptor to be added.
465 /// * `events`: specifies the events to be watched.
466 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
467 ///
468 /// # Examples
469 ///
470 /// ```
471 /// extern crate vmm_sys_util;
472 /// use vmm_sys_util::eventfd::EventFd;
473 /// use vmm_sys_util::poll::{EpollContext, WatchingEvents};
474 ///
475 /// let evt = EventFd::new(0).unwrap();
476 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
477 /// ctx.add_fd_with_events(&evt, WatchingEvents::empty().set_read(), 1)
478 /// .unwrap();
479 /// ```
480 pub fn add_fd_with_events(
481 &self,
482 fd: &dyn AsRawFd,
483 events: WatchingEvents,
484 token: T,
485 ) -> Result<()> {
486 let mut evt = epoll_event {
487 events: events.get_raw(),
488 u64: token.as_raw_token(),
489 };
490 // SAFETY: Safe because we give a valid epoll FD and FD to watch, as well as a
491 // valid epoll_event structure. Then we check the return value.
492 let ret = unsafe {
493 epoll_ctl(
494 self.epoll_ctx.as_raw_fd(),
495 EPOLL_CTL_ADD,
496 fd.as_raw_fd(),
497 &mut evt,
498 )
499 };
500 if ret < 0 {
501 return errno_result();
502 };
503 Ok(())
504 }
505
506 /// Changes the setting associated with the given `fd` in this context.
507 ///
508 /// If `fd` was previously added to this context, the watched events will be replaced with
509 /// `events` and the token associated with it will be replaced with the given `token`.
510 ///
511 /// # Arguments
512 ///
513 /// * `fd`: the target file descriptor to be performed.
514 /// * `events`: specifies the events to be watched.
515 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
516 ///
517 /// # Examples
518 ///
519 /// ```
520 /// extern crate vmm_sys_util;
521 /// use vmm_sys_util::eventfd::EventFd;
522 /// use vmm_sys_util::poll::{EpollContext, WatchingEvents};
523 ///
524 /// let evt = EventFd::new(0).unwrap();
525 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
526 /// ctx.add_fd_with_events(&evt, WatchingEvents::empty().set_read(), 1)
527 /// .unwrap();
528 /// ctx.modify(&evt, WatchingEvents::empty().set_write(), 2)
529 /// .unwrap();
530 /// ```
531 pub fn modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()> {
532 let mut evt = epoll_event {
533 events: events.0,
534 u64: token.as_raw_token(),
535 };
536 // SAFETY: Safe because we give a valid epoll FD and FD to modify, as well as a valid
537 // epoll_event structure. Then we check the return value.
538 let ret = unsafe {
539 epoll_ctl(
540 self.epoll_ctx.as_raw_fd(),
541 EPOLL_CTL_MOD,
542 fd.as_raw_fd(),
543 &mut evt,
544 )
545 };
546 if ret < 0 {
547 return errno_result();
548 };
549 Ok(())
550 }
551
552 /// Deletes the given `fd` from this context.
553 ///
554 /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this
555 /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`.
556 /// Failure to do so will cause the `wait` method to always return immediately, causing ~100%
557 /// CPU load.
558 ///
559 /// # Arguments
560 ///
561 /// * `fd`: the target file descriptor to be removed.
562 ///
563 /// # Examples
564 ///
565 /// ```
566 /// extern crate vmm_sys_util;
567 /// use vmm_sys_util::eventfd::EventFd;
568 /// use vmm_sys_util::poll::EpollContext;
569 ///
570 /// let evt = EventFd::new(0).unwrap();
571 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
572 /// ctx.add(&evt, 1).unwrap();
573 /// ctx.delete(&evt).unwrap();
574 /// ```
575 pub fn delete(&self, fd: &dyn AsRawFd) -> Result<()> {
576 // SAFETY: Safe because we give a valid epoll FD and FD to stop watching. Then we check
577 // the return value.
578 let ret = unsafe {
579 epoll_ctl(
580 self.epoll_ctx.as_raw_fd(),
581 EPOLL_CTL_DEL,
582 fd.as_raw_fd(),
583 null_mut(),
584 )
585 };
586 if ret < 0 {
587 return errno_result();
588 };
589 Ok(())
590 }
591
592 /// Waits for any events to occur in FDs that were previously added to this context.
593 ///
594 /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading
595 /// for readable events and not closing for hungup events), subsequent calls to `wait` will
596 /// return immediately. The consequence of not handling an event perpetually while calling
597 /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to
598 /// ~100% usage.
599 ///
600 /// # Arguments
601 ///
602 /// * `events`: the events to wait for.
603 ///
604 /// # Examples
605 ///
606 /// ```
607 /// extern crate vmm_sys_util;
608 /// use vmm_sys_util::eventfd::EventFd;
609 /// use vmm_sys_util::poll::{EpollContext, EpollEvents};
610 ///
611 /// let evt = EventFd::new(0).unwrap();
612 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
613 /// let events = EpollEvents::new();
614 ///
615 /// evt.write(1).unwrap();
616 /// ctx.add(&evt, 1).unwrap();
617 ///
618 /// for event in ctx.wait(&events).unwrap().iter_readable() {
619 /// assert_eq!(event.token(), 1);
620 /// }
621 /// ```
622 pub fn wait<'a>(&self, events: &'a EpollEvents) -> Result<PollEvents<'a, T>> {
623 self.wait_timeout(events, Duration::new(i64::MAX as u64, 0))
624 }
625
626 /// Like [`wait`](struct.EpollContext.html#method.wait) except will only block for a
627 /// maximum of the given `timeout`.
628 ///
629 /// This may return earlier than `timeout` with zero events if the duration indicated exceeds
630 /// system limits.
631 ///
632 /// # Arguments
633 ///
634 /// * `events`: the events to wait for.
635 /// * `timeout`: specifies the timeout that will block.
636 ///
637 /// # Examples
638 ///
639 /// ```
640 /// extern crate vmm_sys_util;
641 /// # use std::time::Duration;
642 /// use vmm_sys_util::eventfd::EventFd;
643 /// use vmm_sys_util::poll::{EpollContext, EpollEvents};
644 ///
645 /// let evt = EventFd::new(0).unwrap();
646 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
647 /// let events = EpollEvents::new();
648 ///
649 /// evt.write(1).unwrap();
650 /// ctx.add(&evt, 1).unwrap();
651 /// for event in ctx
652 /// .wait_timeout(&events, Duration::new(100, 0))
653 /// .unwrap()
654 /// .iter_readable()
655 /// {
656 /// assert_eq!(event.token(), 1);
657 /// }
658 /// ```
659 pub fn wait_timeout<'a>(
660 &self,
661 events: &'a EpollEvents,
662 timeout: Duration,
663 ) -> Result<PollEvents<'a, T>> {
664 let timeout_millis = if timeout.as_secs() as i64 == i64::MAX {
665 // We make the convenient assumption that 2^63 seconds is an effectively unbounded time
666 // frame. This is meant to mesh with `wait` calling us with no timeout.
667 -1
668 } else {
669 // In cases where we the number of milliseconds would overflow an i32, we substitute the
670 // maximum timeout which is ~24.8 days.
671 let millis = timeout
672 .as_secs()
673 .checked_mul(1_000)
674 .and_then(|ms| ms.checked_add(u64::from(timeout.subsec_nanos()) / 1_000_000))
675 .unwrap_or(i32::MAX as u64);
676 min(i32::MAX as u64, millis) as i32
677 };
678 let ret = {
679 let mut epoll_events = events.0.borrow_mut();
680 let max_events = epoll_events.len() as c_int;
681 // SAFETY: Safe because we give an epoll context and a properly sized epoll_events
682 // array pointer, which we trust the kernel to fill in properly.
683 unsafe {
684 handle_eintr_errno!(epoll_wait(
685 self.epoll_ctx.as_raw_fd(),
686 &mut epoll_events[0],
687 max_events,
688 timeout_millis
689 ))
690 }
691 };
692 if ret < 0 {
693 return errno_result();
694 }
695 let epoll_events = events.0.borrow();
696 let events = PollEvents {
697 count: ret as usize,
698 events: epoll_events,
699 tokens: PhantomData,
700 };
701 Ok(events)
702 }
703}
704
705impl<T: PollToken> AsRawFd for EpollContext<T> {
706 fn as_raw_fd(&self) -> RawFd {
707 self.epoll_ctx.as_raw_fd()
708 }
709}
710
711impl<T: PollToken> IntoRawFd for EpollContext<T> {
712 fn into_raw_fd(self) -> RawFd {
713 self.epoll_ctx.into_raw_fd()
714 }
715}
716
717/// Used to poll multiple objects that have file descriptors.
718///
719/// # Example
720///
721/// ```
722/// # use vmm_sys_util::errno::Result;
723/// # use vmm_sys_util::eventfd::EventFd;
724/// # use vmm_sys_util::poll::{PollContext, PollEvents};
725/// let evt1 = EventFd::new(0).unwrap();
726/// let evt2 = EventFd::new(0).unwrap();
727/// evt2.write(1).unwrap();
728///
729/// let ctx: PollContext<u32> = PollContext::new().unwrap();
730/// ctx.add(&evt1, 1).unwrap();
731/// ctx.add(&evt2, 2).unwrap();
732///
733/// let pollevents: PollEvents<u32> = ctx.wait().unwrap();
734/// let tokens: Vec<u32> = pollevents.iter_readable().map(|e| e.token()).collect();
735/// assert_eq!(&tokens[..], &[2]);
736/// ```
737#[derive(Debug)]
738pub struct PollContext<T> {
739 epoll_ctx: EpollContext<T>,
740
741 // We use a RefCell here so that the `wait` method only requires an immutable self reference
742 // while returning the events (encapsulated by PollEvents). Without the RefCell, `wait` would
743 // hold a mutable reference that lives as long as its returned reference (i.e. the PollEvents),
744 // even though that reference is immutable. This is terribly inconvenient for the caller because
745 // the borrow checking would prevent them from using `delete` and `add` while the events are in
746 // scope.
747 events: EpollEvents,
748
749 // Hangup busy loop detection variables. See `check_for_hungup_busy_loop`.
750 check_for_hangup: bool,
751 hangups: Cell<usize>,
752 max_hangups: Cell<usize>,
753}
754
755impl<T: PollToken> PollContext<T> {
756 /// Creates a new `PollContext`.
757 pub fn new() -> Result<PollContext<T>> {
758 Ok(PollContext {
759 epoll_ctx: EpollContext::new()?,
760 events: EpollEvents::new(),
761 check_for_hangup: true,
762 hangups: Cell::new(0),
763 max_hangups: Cell::new(0),
764 })
765 }
766
767 /// Enable/disable of checking for unhandled hangup events.
768 pub fn set_check_for_hangup(&mut self, enable: bool) {
769 self.check_for_hangup = enable;
770 }
771
772 /// Adds the given `fd` to this context and associates the given `token` with the `fd`'s
773 /// readable events.
774 ///
775 /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and
776 /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different
777 /// FD number) added to this context, events will not be reported by `wait` anymore.
778 ///
779 /// # Arguments
780 ///
781 /// * `fd`: the target file descriptor to be added.
782 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
783 pub fn add(&self, fd: &dyn AsRawFd, token: T) -> Result<()> {
784 self.add_fd_with_events(fd, WatchingEvents::empty().set_read(), token)
785 }
786
787 /// Adds the given `fd` to this context, watching for the specified events and associates the
788 /// given 'token' with those events.
789 ///
790 /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and
791 /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different
792 /// FD number) added to this context, events will not be reported by `wait` anymore.
793 ///
794 /// # Arguments
795 ///
796 /// * `fd`: the target file descriptor to be added.
797 /// * `events`: specifies the events to be watched.
798 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
799 pub fn add_fd_with_events(
800 &self,
801 fd: &dyn AsRawFd,
802 events: WatchingEvents,
803 token: T,
804 ) -> Result<()> {
805 self.epoll_ctx.add_fd_with_events(fd, events, token)?;
806 self.hangups.set(0);
807 self.max_hangups.set(self.max_hangups.get() + 1);
808 Ok(())
809 }
810
811 /// Changes the setting associated with the given `fd` in this context.
812 ///
813 /// If `fd` was previously added to this context, the watched events will be replaced with
814 /// `events` and the token associated with it will be replaced with the given `token`.
815 ///
816 /// # Arguments
817 ///
818 /// * `fd`: the target file descriptor to be modified.
819 /// * `events`: specifies the events to be watched.
820 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
821 pub fn modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()> {
822 self.epoll_ctx.modify(fd, events, token)
823 }
824
825 /// Deletes the given `fd` from this context.
826 ///
827 /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this
828 /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`.
829 /// Failure to do so will cause the `wait` method to always return immediately, causing ~100%
830 /// CPU load.
831 ///
832 /// # Arguments
833 ///
834 /// * `fd`: the target file descriptor to be removed.
835 pub fn delete(&self, fd: &dyn AsRawFd) -> Result<()> {
836 self.epoll_ctx.delete(fd)?;
837 self.hangups.set(0);
838 self.max_hangups.set(self.max_hangups.get() - 1);
839 Ok(())
840 }
841
842 // This method determines if the the user of wait is misusing the `PollContext` by leaving FDs
843 // in this `PollContext` that have been shutdown or hungup on. Such an FD will cause `wait` to
844 // return instantly with a hungup event. If that FD is perpetually left in this context, a busy
845 // loop burning ~100% of one CPU will silently occur with no human visible malfunction.
846 //
847 // How do we know if the client of this context is ignoring hangups? A naive implementation
848 // would trigger if consecutive wait calls yield hangup events, but there are legitimate cases
849 // for this, such as two distinct sockets becoming hungup across two consecutive wait calls. A
850 // smarter implementation would only trigger if `delete` wasn't called between waits that
851 // yielded hangups. Sadly `delete` isn't the only way to remove an FD from this context. The
852 // other way is for the client to close the hungup FD, which automatically removes it from this
853 // context. Assuming that the client always uses close, this implementation would too eagerly
854 // trigger.
855 //
856 // The implementation used here keeps an upper bound of FDs in this context using a counter
857 // hooked into add/delete (which is imprecise because close can also remove FDs without us
858 // knowing). The number of consecutive (no add or delete in between) hangups yielded by wait
859 // calls is counted and compared to the upper bound. If the upper bound is exceeded by the
860 // consecutive hangups, the implementation triggers the check and logs.
861 //
862 // This implementation has false negatives because the upper bound can be completely too high,
863 // in the worst case caused by only using close instead of delete. However, this method has the
864 // advantage of always triggering eventually genuine busy loop cases, requires no dynamic
865 // allocations, is fast and constant time to compute, and has no false positives.
866 fn check_for_hungup_busy_loop(&self, new_hangups: usize) {
867 let old_hangups = self.hangups.get();
868 let max_hangups = self.max_hangups.get();
869 if old_hangups <= max_hangups && old_hangups + new_hangups > max_hangups {
870 let mut buf = [0u8; 512];
871 let (res, len) = {
872 let mut buf_cursor = Cursor::new(&mut buf[..]);
873 // Oops, clippy bug. See https://github.com/rust-lang/rust-clippy/issues/9810
874 #[allow(clippy::write_literal)]
875 (
876 writeln!(
877 &mut buf_cursor,
878 "[{}:{}] busy poll wait loop with hungup FDs detected on thread {}\n",
879 file!(),
880 line!(),
881 thread::current().name().unwrap_or("")
882 ),
883 buf_cursor.position() as usize,
884 )
885 };
886
887 if res.is_ok() {
888 let _ = stderr().write_all(&buf[..len]);
889 }
890 // This panic is helpful for tests of this functionality.
891 #[cfg(test)]
892 panic!("hungup busy loop detected");
893 }
894 self.hangups.set(old_hangups + new_hangups);
895 }
896
897 /// Waits for any events to occur in FDs that were previously added to this context.
898 ///
899 /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading
900 /// for readable events and not closing for hungup events), subsequent calls to `wait` will
901 /// return immediately. The consequence of not handling an event perpetually while calling
902 /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to
903 /// ~100% usage.
904 ///
905 /// # Panics
906 /// Panics if the returned `PollEvents` structure is not dropped before subsequent `wait` calls.
907 pub fn wait(&self) -> Result<PollEvents<'_, T>> {
908 self.wait_timeout(Duration::new(i64::MAX as u64, 0))
909 }
910
911 /// Like [`wait`](struct.EpollContext.html#method.wait) except will only block for a
912 /// maximum of the given `timeout`.
913 ///
914 /// This may return earlier than `timeout` with zero events if the duration indicated exceeds
915 /// system limits.
916 ///
917 /// # Arguments
918 ///
919 /// * `timeout`: specify the time that will block.
920 pub fn wait_timeout(&self, timeout: Duration) -> Result<PollEvents<'_, T>> {
921 let events = self.epoll_ctx.wait_timeout(&self.events, timeout)?;
922 let hangups = events.iter_hungup().count();
923 if self.check_for_hangup {
924 self.check_for_hungup_busy_loop(hangups);
925 }
926 Ok(events)
927 }
928}
929
930impl<T: PollToken> AsRawFd for PollContext<T> {
931 fn as_raw_fd(&self) -> RawFd {
932 self.epoll_ctx.as_raw_fd()
933 }
934}
935
936impl<T: PollToken> IntoRawFd for PollContext<T> {
937 fn into_raw_fd(self) -> RawFd {
938 self.epoll_ctx.into_raw_fd()
939 }
940}
941
942#[cfg(test)]
943mod tests {
944 use super::*;
945 use crate::eventfd::EventFd;
946 use std::os::unix::net::UnixStream;
947 use std::time::Instant;
948
949 #[test]
950 fn test_poll_context() {
951 let evt1 = EventFd::new(0).unwrap();
952 let evt2 = EventFd::new(0).unwrap();
953 evt1.write(1).unwrap();
954 evt2.write(1).unwrap();
955 let ctx: PollContext<u32> = PollContext::new().unwrap();
956 ctx.add(&evt1, 1).unwrap();
957 ctx.add(&evt2, 2).unwrap();
958
959 let mut evt_count = 0;
960 while evt_count < 2 {
961 for event in ctx.wait().unwrap().iter_readable() {
962 evt_count += 1;
963 match event.token() {
964 1 => {
965 evt1.read().unwrap();
966 ctx.delete(&evt1).unwrap();
967 }
968 2 => {
969 evt2.read().unwrap();
970 ctx.delete(&evt2).unwrap();
971 }
972 _ => panic!("unexpected token"),
973 };
974 }
975 }
976 assert_eq!(evt_count, 2);
977 }
978
979 #[test]
980 fn test_poll_context_overflow() {
981 const EVT_COUNT: usize = POLL_CONTEXT_MAX_EVENTS * 2 + 1;
982 let ctx: PollContext<usize> = PollContext::new().unwrap();
983 let mut evts = Vec::with_capacity(EVT_COUNT);
984 for i in 0..EVT_COUNT {
985 let evt = EventFd::new(0).unwrap();
986 evt.write(1).unwrap();
987 ctx.add(&evt, i).unwrap();
988 evts.push(evt);
989 }
990 let mut evt_count = 0;
991 while evt_count < EVT_COUNT {
992 for event in ctx.wait().unwrap().iter_readable() {
993 evts[event.token()].read().unwrap();
994 evt_count += 1;
995 }
996 }
997 }
998
999 #[test]
1000 #[should_panic]
1001 fn test_poll_context_hungup() {
1002 let (s1, s2) = UnixStream::pair().unwrap();
1003 let ctx: PollContext<u32> = PollContext::new().unwrap();
1004 ctx.add(&s1, 1).unwrap();
1005
1006 // Causes s1 to receive hangup events, which we purposefully ignore to trip the detection
1007 // logic in `PollContext`.
1008 drop(s2);
1009
1010 // Should easily panic within this many iterations.
1011 for _ in 0..1000 {
1012 ctx.wait().unwrap();
1013 }
1014 }
1015
1016 #[test]
1017 fn test_poll_context_timeout() {
1018 let mut ctx: PollContext<u32> = PollContext::new().unwrap();
1019 let dur = Duration::from_millis(10);
1020 let start_inst = Instant::now();
1021
1022 ctx.set_check_for_hangup(false);
1023 ctx.wait_timeout(dur).unwrap();
1024 assert!(start_inst.elapsed() >= dur);
1025 }
1026
1027 #[test]
1028 fn test_poll_event() {
1029 let event = epoll_event {
1030 events: (EPOLLIN | EPOLLERR | EPOLLOUT | EPOLLHUP) as u32,
1031 u64: 0x10,
1032 };
1033 let ev = PollEvent::<u32> {
1034 event: &event,
1035 token: PhantomData,
1036 };
1037
1038 assert_eq!(ev.token(), 0x10);
1039 assert!(ev.readable());
1040 assert!(ev.writable());
1041 assert!(ev.hungup());
1042 assert!(ev.has_error());
1043 assert_eq!(
1044 ev.raw_events(),
1045 (EPOLLIN | EPOLLERR | EPOLLOUT | EPOLLHUP) as u32
1046 );
1047 }
1048}