vmm_sys_util/linux/
poll.rs

1// Copyright 2019 Intel Corporation. All Rights Reserved.
2//
3// Copyright 2017 The Chromium OS Authors. All rights reserved.
4//
5// SPDX-License-Identifier: BSD-3-Clause
6
7//! Traits and structures for working with
8//! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html)
9
10use std::cell::{Cell, Ref, RefCell};
11use std::cmp::min;
12use std::fs::File;
13use std::io::{stderr, Cursor, Write};
14use std::marker::PhantomData;
15use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
16use std::ptr::null_mut;
17use std::slice;
18use std::thread;
19use std::time::Duration;
20
21use libc::{
22    c_int, epoll_create1, epoll_ctl, epoll_event, epoll_wait, EINTR, EPOLLERR, EPOLLHUP, EPOLLIN,
23    EPOLLOUT, EPOLL_CLOEXEC, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD,
24};
25
26use crate::errno::{errno_result, Error, Result};
27
28macro_rules! handle_eintr_errno {
29    ($x:expr) => {{
30        let mut res;
31        loop {
32            res = $x;
33            if res != -1 || Error::last() != Error::new(EINTR) {
34                break;
35            }
36        }
37        res
38    }};
39}
40
41const POLL_CONTEXT_MAX_EVENTS: usize = 16;
42
43/// A wrapper of raw `libc::epoll_event`.
44///
45/// This should only be used with [`EpollContext`](struct.EpollContext.html).
46pub struct EpollEvents(RefCell<[epoll_event; POLL_CONTEXT_MAX_EVENTS]>);
47
48impl std::fmt::Debug for EpollEvents {
49    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50        write!(f, "EpollEvents {{ ... }}")
51    }
52}
53
54impl EpollEvents {
55    /// Creates a new EpollEvents.
56    pub fn new() -> EpollEvents {
57        EpollEvents(RefCell::new(
58            [epoll_event { events: 0, u64: 0 }; POLL_CONTEXT_MAX_EVENTS],
59        ))
60    }
61}
62
63impl Default for EpollEvents {
64    fn default() -> Self {
65        Self::new()
66    }
67}
68
69/// Trait for a token that can be associated with an `fd` in a [`PollContext`](struct.PollContext.html).
70///
71/// Simple enums that have no or primitive variant data can use the `#[derive(PollToken)]`
72/// custom derive to implement this trait.
73pub trait PollToken {
74    /// Converts this token into a u64 that can be turned back into a token via `from_raw_token`.
75    fn as_raw_token(&self) -> u64;
76
77    /// Converts a raw token as returned from `as_raw_token` back into a token.
78    ///
79    /// It is invalid to give a raw token that was not returned via `as_raw_token` from the same
80    /// `Self`. The implementation can expect that this will never happen as a result of its usage
81    /// in `PollContext`.
82    fn from_raw_token(data: u64) -> Self;
83}
84
85impl PollToken for usize {
86    fn as_raw_token(&self) -> u64 {
87        *self as u64
88    }
89
90    fn from_raw_token(data: u64) -> Self {
91        data as Self
92    }
93}
94
95impl PollToken for u64 {
96    fn as_raw_token(&self) -> u64 {
97        *self
98    }
99
100    fn from_raw_token(data: u64) -> Self {
101        data as Self
102    }
103}
104
105impl PollToken for u32 {
106    fn as_raw_token(&self) -> u64 {
107        u64::from(*self)
108    }
109
110    fn from_raw_token(data: u64) -> Self {
111        data as Self
112    }
113}
114
115impl PollToken for u16 {
116    fn as_raw_token(&self) -> u64 {
117        u64::from(*self)
118    }
119
120    fn from_raw_token(data: u64) -> Self {
121        data as Self
122    }
123}
124
125impl PollToken for u8 {
126    fn as_raw_token(&self) -> u64 {
127        u64::from(*self)
128    }
129
130    fn from_raw_token(data: u64) -> Self {
131        data as Self
132    }
133}
134
135impl PollToken for () {
136    fn as_raw_token(&self) -> u64 {
137        0
138    }
139
140    fn from_raw_token(_data: u64) -> Self {}
141}
142
143/// An event returned by [`PollContext::wait`](struct.PollContext.html#method.wait).
144pub struct PollEvent<'a, T> {
145    event: &'a epoll_event,
146    token: PhantomData<T>, // Needed to satisfy usage of T
147}
148
149impl<T> std::fmt::Debug for PollEvent<'_, T> {
150    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
151        f.debug_struct("PollEvent")
152            .field("event", &"?")
153            .field("token", &self.token)
154            .finish()
155    }
156}
157
158impl<T: PollToken> PollEvent<'_, T> {
159    /// Gets the token associated in
160    /// [`PollContext::add`](struct.PollContext.html#method.add) with this event.
161    pub fn token(&self) -> T {
162        T::from_raw_token(self.event.u64)
163    }
164
165    /// Get the raw events returned by the kernel.
166    pub fn raw_events(&self) -> u32 {
167        self.event.events
168    }
169
170    /// Checks if the event is readable.
171    ///
172    /// True if the `fd` associated with this token in
173    /// [`PollContext::add`](struct.PollContext.html#method.add) is readable.
174    pub fn readable(&self) -> bool {
175        self.event.events & (EPOLLIN as u32) != 0
176    }
177
178    /// Checks if the event is writable.
179    ///
180    /// True if the `fd` associated with this token in
181    /// [`PollContext::add`](struct.PollContext.html#method.add) is writable.
182    pub fn writable(&self) -> bool {
183        self.event.events & (EPOLLOUT as u32) != 0
184    }
185
186    /// Checks if the event has been hangup on.
187    ///
188    /// True if the `fd` associated with this token in
189    /// [`PollContext::add`](struct.PollContext.html#method.add) has been hungup on.
190    pub fn hungup(&self) -> bool {
191        self.event.events & (EPOLLHUP as u32) != 0
192    }
193
194    /// Checks if the event has associated error conditions.
195    ///
196    /// True if the `fd` associated with this token in
197    /// [`PollContext::add`](struct.PollContext.html#method.add) has associated error conditions.
198    pub fn has_error(&self) -> bool {
199        self.event.events & (EPOLLERR as u32) != 0
200    }
201}
202
203/// An iterator over a subset of events returned by
204/// [`PollContext::wait`](struct.PollContext.html#method.wait).
205#[derive(Debug)]
206pub struct PollEventIter<'a, I, T>
207where
208    I: Iterator<Item = &'a epoll_event>,
209{
210    mask: u32,
211    iter: I,
212    tokens: PhantomData<[T]>, // Needed to satisfy usage of T
213}
214
215impl<'a, I, T> Iterator for PollEventIter<'a, I, T>
216where
217    I: Iterator<Item = &'a epoll_event>,
218    T: PollToken,
219{
220    type Item = PollEvent<'a, T>;
221    fn next(&mut self) -> Option<Self::Item> {
222        let mask = self.mask;
223        self.iter
224            .find(|event| (event.events & mask) != 0)
225            .map(|event| PollEvent {
226                event,
227                token: PhantomData,
228            })
229    }
230}
231
232/// The list of events returned by [`PollContext::wait`](struct.PollContext.html#method.wait).
233pub struct PollEvents<'a, T> {
234    count: usize,
235    events: Ref<'a, [epoll_event; POLL_CONTEXT_MAX_EVENTS]>,
236    tokens: PhantomData<[T]>, // Needed to satisfy usage of T
237}
238
239impl<T> std::fmt::Debug for PollEvents<'_, T> {
240    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
241        f.debug_struct("PollEventsOwned")
242            .field("count", &self.count)
243            .field("events", &"?")
244            .field("tokens", &self.tokens)
245            .finish()
246    }
247}
248
249impl<T: PollToken> PollEvents<'_, T> {
250    /// Creates owned structure from borrowed [`PollEvents`](struct.PollEvents.html).
251    ///
252    /// Copies the events to an owned structure so the reference to this (and by extension
253    /// [`PollContext`](struct.PollContext.html)) can be dropped.
254    pub fn to_owned(&self) -> PollEventsOwned<T> {
255        PollEventsOwned {
256            count: self.count,
257            events: RefCell::new(*self.events),
258            tokens: PhantomData,
259        }
260    }
261
262    /// Iterates over each event.
263    pub fn iter(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
264        PollEventIter {
265            mask: 0xffff_ffff,
266            iter: self.events[..self.count].iter(),
267            tokens: PhantomData,
268        }
269    }
270
271    /// Iterates over each readable event.
272    pub fn iter_readable(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
273        PollEventIter {
274            mask: EPOLLIN as u32,
275            iter: self.events[..self.count].iter(),
276            tokens: PhantomData,
277        }
278    }
279
280    /// Iterates over each hungup event.
281    pub fn iter_hungup(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
282        PollEventIter {
283            mask: EPOLLHUP as u32,
284            iter: self.events[..self.count].iter(),
285            tokens: PhantomData,
286        }
287    }
288}
289
290/// A deep copy of the event records from [`PollEvents`](struct.PollEvents.html).
291pub struct PollEventsOwned<T> {
292    count: usize,
293    events: RefCell<[epoll_event; POLL_CONTEXT_MAX_EVENTS]>,
294    tokens: PhantomData<T>, // Needed to satisfy usage of T
295}
296
297impl<T> std::fmt::Debug for PollEventsOwned<T> {
298    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
299        f.debug_struct("PollEventsOwned")
300            .field("count", &self.count)
301            .field("events", &"?")
302            .field("tokens", &self.tokens)
303            .finish()
304    }
305}
306
307impl<T: PollToken> PollEventsOwned<T> {
308    /// Creates borrowed structure from owned structure
309    /// [`PollEventsOwned`](struct.PollEventsOwned.html).
310    ///
311    /// Takes a reference to the events so it can be iterated via methods in
312    /// [`PollEvents`](struct.PollEvents.html).
313    pub fn as_ref(&self) -> PollEvents<'_, T> {
314        PollEvents {
315            count: self.count,
316            events: self.events.borrow(),
317            tokens: PhantomData,
318        }
319    }
320}
321
322/// Watching events taken by [`PollContext`](struct.PollContext.html).
323#[derive(Debug, Copy, Clone)]
324pub struct WatchingEvents(u32);
325
326impl WatchingEvents {
327    /// Returns empty `WatchingEvents`.
328    #[inline(always)]
329    pub fn empty() -> WatchingEvents {
330        WatchingEvents(0)
331    }
332
333    /// Creates a new `WatchingEvents` with a specified value.
334    ///
335    /// Builds `WatchingEvents` from raw `epoll_event`.
336    ///
337    /// # Arguments
338    ///
339    /// * `raw`: the events to be created for watching.
340    #[inline(always)]
341    pub fn new(raw: u32) -> WatchingEvents {
342        WatchingEvents(raw)
343    }
344
345    /// Sets read events.
346    ///
347    /// Sets the events to be readable.
348    #[inline(always)]
349    pub fn set_read(self) -> WatchingEvents {
350        WatchingEvents(self.0 | EPOLLIN as u32)
351    }
352
353    /// Sets write events.
354    ///
355    /// Sets the events to be writable.
356    #[inline(always)]
357    pub fn set_write(self) -> WatchingEvents {
358        WatchingEvents(self.0 | EPOLLOUT as u32)
359    }
360
361    /// Gets the underlying epoll events.
362    pub fn get_raw(&self) -> u32 {
363        self.0
364    }
365}
366
367/// A wrapper of linux [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html).
368///
369/// It provides similar interface to [`PollContext`](struct.PollContext.html).
370/// It is thread safe while PollContext is not. It requires user to pass in a reference of
371/// EpollEvents while PollContext does not. Always use PollContext if you don't need to access the
372/// same epoll from different threads.
373///
374/// # Examples
375///
376/// ```
377/// extern crate vmm_sys_util;
378/// use vmm_sys_util::eventfd::EventFd;
379/// use vmm_sys_util::poll::{EpollContext, EpollEvents};
380///
381/// let evt = EventFd::new(0).unwrap();
382/// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
383/// let events = EpollEvents::new();
384///
385/// evt.write(1).unwrap();
386/// ctx.add(&evt, 1).unwrap();
387///
388/// for event in ctx.wait(&events).unwrap().iter_readable() {
389///     assert_eq!(event.token(), 1);
390/// }
391/// ```
392#[derive(Debug)]
393pub struct EpollContext<T> {
394    epoll_ctx: File,
395    // Needed to satisfy usage of T
396    tokens: PhantomData<[T]>,
397}
398
399impl<T: PollToken> EpollContext<T> {
400    /// Creates a new `EpollContext`.
401    ///
402    /// Uses [`epoll_create1`](http://man7.org/linux/man-pages/man2/epoll_create.2.html)
403    /// to create a new epoll fd.
404    ///
405    /// # Examples
406    ///
407    /// ```
408    /// extern crate vmm_sys_util;
409    /// use vmm_sys_util::poll::EpollContext;
410    ///
411    /// let ctx: EpollContext<usize> = EpollContext::new().unwrap();
412    /// ```
413    pub fn new() -> Result<EpollContext<T>> {
414        // SAFETY: Safe because we check the return value.
415        let epoll_fd = unsafe { epoll_create1(EPOLL_CLOEXEC) };
416        if epoll_fd < 0 {
417            return errno_result();
418        }
419        Ok(EpollContext {
420            // SAFETY: Safe because we verified that the FD is valid and we trust `epoll_create1`.
421            epoll_ctx: unsafe { File::from_raw_fd(epoll_fd) },
422            tokens: PhantomData,
423        })
424    }
425
426    /// Adds the given `fd` to this context and associates the given
427    /// `token` with the `fd`'s readable events.
428    ///
429    /// A `fd` can only be added once and does not need to be kept open.
430    /// If the `fd` is dropped and there were no duplicated file descriptors
431    /// (i.e. adding the same descriptor with a different FD number) added
432    /// to this context, events will not be reported by `wait` anymore.
433    ///
434    /// # Arguments
435    ///
436    /// * `fd`: the target file descriptor to be added.
437    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
438    ///
439    /// # Examples
440    ///
441    /// ```
442    /// extern crate vmm_sys_util;
443    /// use vmm_sys_util::eventfd::EventFd;
444    /// use vmm_sys_util::poll::EpollContext;
445    ///
446    /// let evt = EventFd::new(0).unwrap();
447    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
448    /// ctx.add(&evt, 1).unwrap();
449    /// ```
450    pub fn add(&self, fd: &dyn AsRawFd, token: T) -> Result<()> {
451        self.add_fd_with_events(fd, WatchingEvents::empty().set_read(), token)
452    }
453
454    /// Adds the given `fd` to this context, watching for the specified `events`
455    /// and associates the given 'token' with those events.
456    ///
457    /// A `fd` can only be added once and does not need to be kept open. If the `fd`
458    /// is dropped and there were no duplicated file descriptors (i.e. adding the same
459    /// descriptor with a different FD number) added to this context, events will
460    /// not be reported by `wait` anymore.
461    ///
462    /// # Arguments
463    ///
464    /// * `fd`: the target file descriptor to be added.
465    /// * `events`: specifies the events to be watched.
466    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
467    ///
468    /// # Examples
469    ///
470    /// ```
471    /// extern crate vmm_sys_util;
472    /// use vmm_sys_util::eventfd::EventFd;
473    /// use vmm_sys_util::poll::{EpollContext, WatchingEvents};
474    ///
475    /// let evt = EventFd::new(0).unwrap();
476    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
477    /// ctx.add_fd_with_events(&evt, WatchingEvents::empty().set_read(), 1)
478    ///     .unwrap();
479    /// ```
480    pub fn add_fd_with_events(
481        &self,
482        fd: &dyn AsRawFd,
483        events: WatchingEvents,
484        token: T,
485    ) -> Result<()> {
486        let mut evt = epoll_event {
487            events: events.get_raw(),
488            u64: token.as_raw_token(),
489        };
490        // SAFETY: Safe because we give a valid epoll FD and FD to watch, as well as a
491        // valid epoll_event structure. Then we check the return value.
492        let ret = unsafe {
493            epoll_ctl(
494                self.epoll_ctx.as_raw_fd(),
495                EPOLL_CTL_ADD,
496                fd.as_raw_fd(),
497                &mut evt,
498            )
499        };
500        if ret < 0 {
501            return errno_result();
502        };
503        Ok(())
504    }
505
506    /// Changes the setting associated with the given `fd` in this context.
507    ///
508    /// If `fd` was previously added to this context, the watched events will be replaced with
509    /// `events` and the token associated with it will be replaced with the given `token`.
510    ///
511    /// # Arguments
512    ///
513    /// * `fd`: the target file descriptor to be performed.
514    /// * `events`: specifies the events to be watched.
515    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
516    ///
517    /// # Examples
518    ///
519    /// ```
520    /// extern crate vmm_sys_util;
521    /// use vmm_sys_util::eventfd::EventFd;
522    /// use vmm_sys_util::poll::{EpollContext, WatchingEvents};
523    ///
524    /// let evt = EventFd::new(0).unwrap();
525    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
526    /// ctx.add_fd_with_events(&evt, WatchingEvents::empty().set_read(), 1)
527    ///     .unwrap();
528    /// ctx.modify(&evt, WatchingEvents::empty().set_write(), 2)
529    ///     .unwrap();
530    /// ```
531    pub fn modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()> {
532        let mut evt = epoll_event {
533            events: events.0,
534            u64: token.as_raw_token(),
535        };
536        // SAFETY: Safe because we give a valid epoll FD and FD to modify, as well as a valid
537        // epoll_event structure. Then we check the return value.
538        let ret = unsafe {
539            epoll_ctl(
540                self.epoll_ctx.as_raw_fd(),
541                EPOLL_CTL_MOD,
542                fd.as_raw_fd(),
543                &mut evt,
544            )
545        };
546        if ret < 0 {
547            return errno_result();
548        };
549        Ok(())
550    }
551
552    /// Deletes the given `fd` from this context.
553    ///
554    /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this
555    /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`.
556    /// Failure to do so will cause the `wait` method to always return immediately, causing ~100%
557    /// CPU load.
558    ///
559    /// # Arguments
560    ///
561    /// * `fd`: the target file descriptor to be removed.
562    ///
563    /// # Examples
564    ///
565    /// ```
566    /// extern crate vmm_sys_util;
567    /// use vmm_sys_util::eventfd::EventFd;
568    /// use vmm_sys_util::poll::EpollContext;
569    ///
570    /// let evt = EventFd::new(0).unwrap();
571    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
572    /// ctx.add(&evt, 1).unwrap();
573    /// ctx.delete(&evt).unwrap();
574    /// ```
575    pub fn delete(&self, fd: &dyn AsRawFd) -> Result<()> {
576        // SAFETY: Safe because we give a valid epoll FD and FD to stop watching. Then we check
577        // the return value.
578        let ret = unsafe {
579            epoll_ctl(
580                self.epoll_ctx.as_raw_fd(),
581                EPOLL_CTL_DEL,
582                fd.as_raw_fd(),
583                null_mut(),
584            )
585        };
586        if ret < 0 {
587            return errno_result();
588        };
589        Ok(())
590    }
591
592    /// Waits for any events to occur in FDs that were previously added to this context.
593    ///
594    /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading
595    /// for readable events and not closing for hungup events), subsequent calls to `wait` will
596    /// return immediately. The consequence of not handling an event perpetually while calling
597    /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to
598    /// ~100% usage.
599    ///
600    /// # Arguments
601    ///
602    /// * `events`: the events to wait for.
603    ///
604    /// # Examples
605    ///
606    /// ```
607    /// extern crate vmm_sys_util;
608    /// use vmm_sys_util::eventfd::EventFd;
609    /// use vmm_sys_util::poll::{EpollContext, EpollEvents};
610    ///
611    /// let evt = EventFd::new(0).unwrap();
612    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
613    /// let events = EpollEvents::new();
614    ///
615    /// evt.write(1).unwrap();
616    /// ctx.add(&evt, 1).unwrap();
617    ///
618    /// for event in ctx.wait(&events).unwrap().iter_readable() {
619    ///     assert_eq!(event.token(), 1);
620    /// }
621    /// ```
622    pub fn wait<'a>(&self, events: &'a EpollEvents) -> Result<PollEvents<'a, T>> {
623        self.wait_timeout(events, Duration::new(i64::MAX as u64, 0))
624    }
625
626    /// Like [`wait`](struct.EpollContext.html#method.wait) except will only block for a
627    /// maximum of the given `timeout`.
628    ///
629    /// This may return earlier than `timeout` with zero events if the duration indicated exceeds
630    /// system limits.
631    ///
632    /// # Arguments
633    ///
634    /// * `events`: the events to wait for.
635    /// * `timeout`: specifies the timeout that will block.
636    ///
637    /// # Examples
638    ///
639    /// ```
640    /// extern crate vmm_sys_util;
641    /// # use std::time::Duration;
642    /// use vmm_sys_util::eventfd::EventFd;
643    /// use vmm_sys_util::poll::{EpollContext, EpollEvents};
644    ///
645    /// let evt = EventFd::new(0).unwrap();
646    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
647    /// let events = EpollEvents::new();
648    ///
649    /// evt.write(1).unwrap();
650    /// ctx.add(&evt, 1).unwrap();
651    /// for event in ctx
652    ///     .wait_timeout(&events, Duration::new(100, 0))
653    ///     .unwrap()
654    ///     .iter_readable()
655    /// {
656    ///     assert_eq!(event.token(), 1);
657    /// }
658    /// ```
659    pub fn wait_timeout<'a>(
660        &self,
661        events: &'a EpollEvents,
662        timeout: Duration,
663    ) -> Result<PollEvents<'a, T>> {
664        let timeout_millis = if timeout.as_secs() as i64 == i64::MAX {
665            // We make the convenient assumption that 2^63 seconds is an effectively unbounded time
666            // frame. This is meant to mesh with `wait` calling us with no timeout.
667            -1
668        } else {
669            // In cases where we the number of milliseconds would overflow an i32, we substitute the
670            // maximum timeout which is ~24.8 days.
671            let millis = timeout
672                .as_secs()
673                .checked_mul(1_000)
674                .and_then(|ms| ms.checked_add(u64::from(timeout.subsec_nanos()) / 1_000_000))
675                .unwrap_or(i32::MAX as u64);
676            min(i32::MAX as u64, millis) as i32
677        };
678        let ret = {
679            let mut epoll_events = events.0.borrow_mut();
680            let max_events = epoll_events.len() as c_int;
681            // SAFETY: Safe because we give an epoll context and a properly sized epoll_events
682            // array pointer, which we trust the kernel to fill in properly.
683            unsafe {
684                handle_eintr_errno!(epoll_wait(
685                    self.epoll_ctx.as_raw_fd(),
686                    &mut epoll_events[0],
687                    max_events,
688                    timeout_millis
689                ))
690            }
691        };
692        if ret < 0 {
693            return errno_result();
694        }
695        let epoll_events = events.0.borrow();
696        let events = PollEvents {
697            count: ret as usize,
698            events: epoll_events,
699            tokens: PhantomData,
700        };
701        Ok(events)
702    }
703}
704
705impl<T: PollToken> AsRawFd for EpollContext<T> {
706    fn as_raw_fd(&self) -> RawFd {
707        self.epoll_ctx.as_raw_fd()
708    }
709}
710
711impl<T: PollToken> IntoRawFd for EpollContext<T> {
712    fn into_raw_fd(self) -> RawFd {
713        self.epoll_ctx.into_raw_fd()
714    }
715}
716
717/// Used to poll multiple objects that have file descriptors.
718///
719/// # Example
720///
721/// ```
722/// # use vmm_sys_util::errno::Result;
723/// # use vmm_sys_util::eventfd::EventFd;
724/// # use vmm_sys_util::poll::{PollContext, PollEvents};
725/// let evt1 = EventFd::new(0).unwrap();
726/// let evt2 = EventFd::new(0).unwrap();
727/// evt2.write(1).unwrap();
728///
729/// let ctx: PollContext<u32> = PollContext::new().unwrap();
730/// ctx.add(&evt1, 1).unwrap();
731/// ctx.add(&evt2, 2).unwrap();
732///
733/// let pollevents: PollEvents<u32> = ctx.wait().unwrap();
734/// let tokens: Vec<u32> = pollevents.iter_readable().map(|e| e.token()).collect();
735/// assert_eq!(&tokens[..], &[2]);
736/// ```
737#[derive(Debug)]
738pub struct PollContext<T> {
739    epoll_ctx: EpollContext<T>,
740
741    // We use a RefCell here so that the `wait` method only requires an immutable self reference
742    // while returning the events (encapsulated by PollEvents). Without the RefCell, `wait` would
743    // hold a mutable reference that lives as long as its returned reference (i.e. the PollEvents),
744    // even though that reference is immutable. This is terribly inconvenient for the caller because
745    // the borrow checking would prevent them from using `delete` and `add` while the events are in
746    // scope.
747    events: EpollEvents,
748
749    // Hangup busy loop detection variables. See `check_for_hungup_busy_loop`.
750    check_for_hangup: bool,
751    hangups: Cell<usize>,
752    max_hangups: Cell<usize>,
753}
754
755impl<T: PollToken> PollContext<T> {
756    /// Creates a new `PollContext`.
757    pub fn new() -> Result<PollContext<T>> {
758        Ok(PollContext {
759            epoll_ctx: EpollContext::new()?,
760            events: EpollEvents::new(),
761            check_for_hangup: true,
762            hangups: Cell::new(0),
763            max_hangups: Cell::new(0),
764        })
765    }
766
767    /// Enable/disable of checking for unhandled hangup events.
768    pub fn set_check_for_hangup(&mut self, enable: bool) {
769        self.check_for_hangup = enable;
770    }
771
772    /// Adds the given `fd` to this context and associates the given `token` with the `fd`'s
773    /// readable events.
774    ///
775    /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and
776    /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different
777    /// FD number) added to this context, events will not be reported by `wait` anymore.
778    ///
779    /// # Arguments
780    ///
781    /// * `fd`: the target file descriptor to be added.
782    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
783    pub fn add(&self, fd: &dyn AsRawFd, token: T) -> Result<()> {
784        self.add_fd_with_events(fd, WatchingEvents::empty().set_read(), token)
785    }
786
787    /// Adds the given `fd` to this context, watching for the specified events and associates the
788    /// given 'token' with those events.
789    ///
790    /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and
791    /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different
792    /// FD number) added to this context, events will not be reported by `wait` anymore.
793    ///
794    /// # Arguments
795    ///
796    /// * `fd`: the target file descriptor to be added.
797    /// * `events`: specifies the events to be watched.
798    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
799    pub fn add_fd_with_events(
800        &self,
801        fd: &dyn AsRawFd,
802        events: WatchingEvents,
803        token: T,
804    ) -> Result<()> {
805        self.epoll_ctx.add_fd_with_events(fd, events, token)?;
806        self.hangups.set(0);
807        self.max_hangups.set(self.max_hangups.get() + 1);
808        Ok(())
809    }
810
811    /// Changes the setting associated with the given `fd` in this context.
812    ///
813    /// If `fd` was previously added to this context, the watched events will be replaced with
814    /// `events` and the token associated with it will be replaced with the given `token`.
815    ///
816    /// # Arguments
817    ///
818    /// * `fd`: the target file descriptor to be modified.
819    /// * `events`: specifies the events to be watched.
820    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
821    pub fn modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()> {
822        self.epoll_ctx.modify(fd, events, token)
823    }
824
825    /// Deletes the given `fd` from this context.
826    ///
827    /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this
828    /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`.
829    /// Failure to do so will cause the `wait` method to always return immediately, causing ~100%
830    /// CPU load.
831    ///
832    /// # Arguments
833    ///
834    /// * `fd`: the target file descriptor to be removed.
835    pub fn delete(&self, fd: &dyn AsRawFd) -> Result<()> {
836        self.epoll_ctx.delete(fd)?;
837        self.hangups.set(0);
838        self.max_hangups.set(self.max_hangups.get() - 1);
839        Ok(())
840    }
841
842    // This method determines if the the user of wait is misusing the `PollContext` by leaving FDs
843    // in this `PollContext` that have been shutdown or hungup on. Such an FD will cause `wait` to
844    // return instantly with a hungup event. If that FD is perpetually left in this context, a busy
845    // loop burning ~100% of one CPU will silently occur with no human visible malfunction.
846    //
847    // How do we know if the client of this context is ignoring hangups? A naive implementation
848    // would trigger if consecutive wait calls yield hangup events, but there are legitimate cases
849    // for this, such as two distinct sockets becoming hungup across two consecutive wait calls. A
850    // smarter implementation would only trigger if `delete` wasn't called between waits that
851    // yielded hangups. Sadly `delete` isn't the only way to remove an FD from this context. The
852    // other way is for the client to close the hungup FD, which automatically removes it from this
853    // context. Assuming that the client always uses close, this implementation would too eagerly
854    // trigger.
855    //
856    // The implementation used here keeps an upper bound of FDs in this context using a counter
857    // hooked into add/delete (which is imprecise because close can also remove FDs without us
858    // knowing). The number of consecutive (no add or delete in between) hangups yielded by wait
859    // calls is counted and compared to the upper bound. If the upper bound is exceeded by the
860    // consecutive hangups, the implementation triggers the check and logs.
861    //
862    // This implementation has false negatives because the upper bound can be completely too high,
863    // in the worst case caused by only using close instead of delete. However, this method has the
864    // advantage of always triggering eventually genuine busy loop cases, requires no dynamic
865    // allocations, is fast and constant time to compute, and has no false positives.
866    fn check_for_hungup_busy_loop(&self, new_hangups: usize) {
867        let old_hangups = self.hangups.get();
868        let max_hangups = self.max_hangups.get();
869        if old_hangups <= max_hangups && old_hangups + new_hangups > max_hangups {
870            let mut buf = [0u8; 512];
871            let (res, len) = {
872                let mut buf_cursor = Cursor::new(&mut buf[..]);
873                // Oops, clippy bug. See https://github.com/rust-lang/rust-clippy/issues/9810
874                #[allow(clippy::write_literal)]
875                (
876                    writeln!(
877                        &mut buf_cursor,
878                        "[{}:{}] busy poll wait loop with hungup FDs detected on thread {}\n",
879                        file!(),
880                        line!(),
881                        thread::current().name().unwrap_or("")
882                    ),
883                    buf_cursor.position() as usize,
884                )
885            };
886
887            if res.is_ok() {
888                let _ = stderr().write_all(&buf[..len]);
889            }
890            // This panic is helpful for tests of this functionality.
891            #[cfg(test)]
892            panic!("hungup busy loop detected");
893        }
894        self.hangups.set(old_hangups + new_hangups);
895    }
896
897    /// Waits for any events to occur in FDs that were previously added to this context.
898    ///
899    /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading
900    /// for readable events and not closing for hungup events), subsequent calls to `wait` will
901    /// return immediately. The consequence of not handling an event perpetually while calling
902    /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to
903    /// ~100% usage.
904    ///
905    /// # Panics
906    /// Panics if the returned `PollEvents` structure is not dropped before subsequent `wait` calls.
907    pub fn wait(&self) -> Result<PollEvents<'_, T>> {
908        self.wait_timeout(Duration::new(i64::MAX as u64, 0))
909    }
910
911    /// Like [`wait`](struct.EpollContext.html#method.wait) except will only block for a
912    /// maximum of the given `timeout`.
913    ///
914    /// This may return earlier than `timeout` with zero events if the duration indicated exceeds
915    /// system limits.
916    ///
917    /// # Arguments
918    ///
919    /// * `timeout`: specify the time that will block.
920    pub fn wait_timeout(&self, timeout: Duration) -> Result<PollEvents<'_, T>> {
921        let events = self.epoll_ctx.wait_timeout(&self.events, timeout)?;
922        let hangups = events.iter_hungup().count();
923        if self.check_for_hangup {
924            self.check_for_hungup_busy_loop(hangups);
925        }
926        Ok(events)
927    }
928}
929
930impl<T: PollToken> AsRawFd for PollContext<T> {
931    fn as_raw_fd(&self) -> RawFd {
932        self.epoll_ctx.as_raw_fd()
933    }
934}
935
936impl<T: PollToken> IntoRawFd for PollContext<T> {
937    fn into_raw_fd(self) -> RawFd {
938        self.epoll_ctx.into_raw_fd()
939    }
940}
941
942#[cfg(test)]
943mod tests {
944    use super::*;
945    use crate::eventfd::EventFd;
946    use std::os::unix::net::UnixStream;
947    use std::time::Instant;
948
949    #[test]
950    fn test_poll_context() {
951        let evt1 = EventFd::new(0).unwrap();
952        let evt2 = EventFd::new(0).unwrap();
953        evt1.write(1).unwrap();
954        evt2.write(1).unwrap();
955        let ctx: PollContext<u32> = PollContext::new().unwrap();
956        ctx.add(&evt1, 1).unwrap();
957        ctx.add(&evt2, 2).unwrap();
958
959        let mut evt_count = 0;
960        while evt_count < 2 {
961            for event in ctx.wait().unwrap().iter_readable() {
962                evt_count += 1;
963                match event.token() {
964                    1 => {
965                        evt1.read().unwrap();
966                        ctx.delete(&evt1).unwrap();
967                    }
968                    2 => {
969                        evt2.read().unwrap();
970                        ctx.delete(&evt2).unwrap();
971                    }
972                    _ => panic!("unexpected token"),
973                };
974            }
975        }
976        assert_eq!(evt_count, 2);
977    }
978
979    #[test]
980    fn test_poll_context_overflow() {
981        const EVT_COUNT: usize = POLL_CONTEXT_MAX_EVENTS * 2 + 1;
982        let ctx: PollContext<usize> = PollContext::new().unwrap();
983        let mut evts = Vec::with_capacity(EVT_COUNT);
984        for i in 0..EVT_COUNT {
985            let evt = EventFd::new(0).unwrap();
986            evt.write(1).unwrap();
987            ctx.add(&evt, i).unwrap();
988            evts.push(evt);
989        }
990        let mut evt_count = 0;
991        while evt_count < EVT_COUNT {
992            for event in ctx.wait().unwrap().iter_readable() {
993                evts[event.token()].read().unwrap();
994                evt_count += 1;
995            }
996        }
997    }
998
999    #[test]
1000    #[should_panic]
1001    fn test_poll_context_hungup() {
1002        let (s1, s2) = UnixStream::pair().unwrap();
1003        let ctx: PollContext<u32> = PollContext::new().unwrap();
1004        ctx.add(&s1, 1).unwrap();
1005
1006        // Causes s1 to receive hangup events, which we purposefully ignore to trip the detection
1007        // logic in `PollContext`.
1008        drop(s2);
1009
1010        // Should easily panic within this many iterations.
1011        for _ in 0..1000 {
1012            ctx.wait().unwrap();
1013        }
1014    }
1015
1016    #[test]
1017    fn test_poll_context_timeout() {
1018        let mut ctx: PollContext<u32> = PollContext::new().unwrap();
1019        let dur = Duration::from_millis(10);
1020        let start_inst = Instant::now();
1021
1022        ctx.set_check_for_hangup(false);
1023        ctx.wait_timeout(dur).unwrap();
1024        assert!(start_inst.elapsed() >= dur);
1025    }
1026
1027    #[test]
1028    fn test_poll_event() {
1029        let event = epoll_event {
1030            events: (EPOLLIN | EPOLLERR | EPOLLOUT | EPOLLHUP) as u32,
1031            u64: 0x10,
1032        };
1033        let ev = PollEvent::<u32> {
1034            event: &event,
1035            token: PhantomData,
1036        };
1037
1038        assert_eq!(ev.token(), 0x10);
1039        assert!(ev.readable());
1040        assert!(ev.writable());
1041        assert!(ev.hungup());
1042        assert!(ev.has_error());
1043        assert_eq!(
1044            ev.raw_events(),
1045            (EPOLLIN | EPOLLERR | EPOLLOUT | EPOLLHUP) as u32
1046        );
1047    }
1048}