vmm-sys-util 0.11.1

A system utility set
Documentation
// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: BSD-3-Clause

//! Safe wrappers over the
//! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html) API.

use std::io;
use std::ops::{Deref, Drop};
use std::os::unix::io::{AsRawFd, RawFd};

#[cfg(any(target_os = "linux", target_os = "android"))]
use bitflags::bitflags;
use libc::{
    epoll_create1, epoll_ctl, epoll_event, epoll_wait, EPOLLERR, EPOLLET, EPOLLEXCLUSIVE, EPOLLHUP,
    EPOLLIN, EPOLLONESHOT, EPOLLOUT, EPOLLPRI, EPOLLRDHUP, EPOLLWAKEUP, EPOLL_CLOEXEC,
    EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD,
};

use crate::syscall::SyscallReturnCode;

/// Wrapper over `EPOLL_CTL_*` operations that can be performed on a file descriptor.
#[repr(i32)]
pub enum ControlOperation {
    /// Add a file descriptor to the interest list.
    Add = EPOLL_CTL_ADD,
    /// Change the settings associated with a file descriptor that is
    /// already in the interest list.
    Modify = EPOLL_CTL_MOD,
    /// Remove a file descriptor from the interest list.
    Delete = EPOLL_CTL_DEL,
}

bitflags! {
    /// The type of events we can monitor a file descriptor for.
    pub struct EventSet: u32 {
        /// The associated file descriptor is available for read operations.
        const IN = EPOLLIN as u32;
        /// The associated file descriptor is available for write operations.
        const OUT = EPOLLOUT as u32;
        /// Error condition happened on the associated file descriptor.
        const ERROR = EPOLLERR as u32;
        /// This can be used to detect peer shutdown when using Edge Triggered monitoring.
        const READ_HANG_UP = EPOLLRDHUP as u32;
        /// Sets the Edge Triggered behavior for the associated file descriptor.
        /// The default behavior is Level Triggered.
        const EDGE_TRIGGERED = EPOLLET as u32;
        /// Hang up happened on the associated file descriptor. Note that `epoll_wait`
        /// will always wait for this event and it is not necessary to set it in events.
        const HANG_UP = EPOLLHUP as u32;
        /// There is an exceptional condition on that file descriptor. It is mostly used to
        /// set high priority for some data.
        const PRIORITY = EPOLLPRI as u32;
        /// The event is considered as being "processed" from the time when it is returned
        /// by a call to `epoll_wait` until the next call to `epoll_wait` on the same
        /// epoll file descriptor, the closure of that file descriptor, the removal of the
        /// event file descriptor via EPOLL_CTL_DEL, or the clearing of EPOLLWAKEUP
        /// for the event file descriptor via EPOLL_CTL_MOD.
        const WAKE_UP = EPOLLWAKEUP as u32;
        /// Sets the one-shot behavior for the associated file descriptor.
        const ONE_SHOT = EPOLLONESHOT as u32;
        /// Sets an exclusive wake up mode for the epoll file descriptor that is being
        /// attached to the associated file descriptor.
        /// When a wake up event occurs and multiple epoll file descriptors are attached to
        /// the same target file using this mode, one or more of the epoll file descriptors
        /// will receive an event with `epoll_wait`. The default here is for all those file
        /// descriptors to receive an event.
        const EXCLUSIVE = EPOLLEXCLUSIVE as u32;
    }
}

/// Wrapper over
/// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
// We are using `transparent` here to be super sure that this struct and its fields
// have the same alignment as those from the `epoll_event` struct from C.
#[repr(transparent)]
#[derive(Clone, Copy)]
pub struct EpollEvent(epoll_event);

impl std::fmt::Debug for EpollEvent {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{{ events: {}, data: {} }}", self.events(), self.data())
    }
}

impl Deref for EpollEvent {
    type Target = epoll_event;
    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl Default for EpollEvent {
    fn default() -> Self {
        EpollEvent(epoll_event {
            events: 0u32,
            u64: 0u64,
        })
    }
}

impl EpollEvent {
    /// Create a new epoll_event instance.
    ///
    /// # Arguments
    ///
    /// `events` - contains an event mask.
    /// `data` - a user data variable. `data` field can be a fd on which
    ///          we want to monitor the events specified by `events`.
    ///
    /// # Examples
    ///
    /// ```
    /// extern crate vmm_sys_util;
    /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
    ///
    /// let event = EpollEvent::new(EventSet::IN, 2);
    /// ```
    pub fn new(events: EventSet, data: u64) -> Self {
        EpollEvent(epoll_event {
            events: events.bits(),
            u64: data,
        })
    }

    /// Returns the `events` from
    /// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
    ///
    /// # Examples
    ///
    /// ```
    /// extern crate vmm_sys_util;
    /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
    ///
    /// let event = EpollEvent::new(EventSet::IN, 2);
    /// assert_eq!(event.events(), 1);
    /// ```
    pub fn events(&self) -> u32 {
        self.events
    }

    /// Returns the `EventSet` corresponding to `epoll_event.events`.
    ///
    /// # Panics
    ///
    /// Panics if `libc::epoll_event` contains invalid events.
    ///
    ///
    /// # Examples
    ///
    /// ```
    /// extern crate vmm_sys_util;
    /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
    ///
    /// let event = EpollEvent::new(EventSet::IN, 2);
    /// assert_eq!(event.event_set(), EventSet::IN);
    /// ```
    pub fn event_set(&self) -> EventSet {
        // This unwrap is safe because `epoll_events` can only be user created or
        // initialized by the kernel. We trust the kernel to only send us valid
        // events. The user can only initialize `epoll_events` using valid events.
        EventSet::from_bits(self.events()).unwrap()
    }

    /// Returns the `data` from the `libc::epoll_event`.
    ///
    /// # Examples
    ///
    /// ```
    /// extern crate vmm_sys_util;
    /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
    ///
    /// let event = EpollEvent::new(EventSet::IN, 2);
    /// assert_eq!(event.data(), 2);
    /// ```
    pub fn data(&self) -> u64 {
        self.u64
    }

    /// Converts the `libc::epoll_event` data to a RawFd.
    ///
    /// This conversion is lossy when the data does not correspond to a RawFd
    /// (data does not fit in a i32).
    ///
    /// # Examples
    ///
    /// ```
    /// extern crate vmm_sys_util;
    /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
    ///
    /// let event = EpollEvent::new(EventSet::IN, 2);
    /// assert_eq!(event.fd(), 2);
    /// ```
    pub fn fd(&self) -> RawFd {
        self.u64 as i32
    }
}

/// Wrapper over epoll functionality.
#[derive(Debug)]
pub struct Epoll {
    epoll_fd: RawFd,
}

impl Epoll {
    /// Create a new epoll file descriptor.
    pub fn new() -> io::Result<Self> {
        let epoll_fd = SyscallReturnCode(
            // SAFETY: Safe because the return code is transformed by `into_result` in a `Result`.
            unsafe { epoll_create1(EPOLL_CLOEXEC) },
        )
        .into_result()?;
        Ok(Epoll { epoll_fd })
    }

    /// Wrapper for `libc::epoll_ctl`.
    ///
    /// This can be used for adding, modifying or removing a file descriptor in the
    /// interest list of the epoll instance.
    ///
    /// # Arguments
    ///
    /// * `operation` - refers to the action to be performed on the file descriptor.
    /// * `fd` - the file descriptor on which we want to perform `operation`.
    /// * `event` - refers to the `epoll_event` instance that is linked to `fd`.
    ///
    /// # Examples
    ///
    /// ```
    /// extern crate vmm_sys_util;
    ///
    /// use std::os::unix::io::AsRawFd;
    /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
    /// use vmm_sys_util::eventfd::EventFd;
    ///
    /// let epoll = Epoll::new().unwrap();
    /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
    /// epoll
    ///     .ctl(
    ///         ControlOperation::Add,
    ///         event_fd.as_raw_fd() as i32,
    ///         EpollEvent::new(EventSet::OUT, event_fd.as_raw_fd() as u64),
    ///     )
    ///     .unwrap();
    /// epoll
    ///     .ctl(
    ///         ControlOperation::Modify,
    ///         event_fd.as_raw_fd() as i32,
    ///         EpollEvent::new(EventSet::IN, 4),
    ///     )
    ///     .unwrap();
    /// ```
    pub fn ctl(&self, operation: ControlOperation, fd: RawFd, event: EpollEvent) -> io::Result<()> {
        SyscallReturnCode(
            // SAFETY: Safe because we give a valid epoll file descriptor, a valid file descriptor
            // to watch, as well as a valid epoll_event structure. We also check the return value.
            unsafe {
                epoll_ctl(
                    self.epoll_fd,
                    operation as i32,
                    fd,
                    &event as *const EpollEvent as *mut epoll_event,
                )
            },
        )
        .into_empty_result()
    }

    /// Wrapper for `libc::epoll_wait`.
    /// Returns the number of file descriptors in the interest list that became ready
    /// for I/O or `errno` if an error occurred.
    ///
    /// # Arguments
    ///
    /// * `timeout` - specifies for how long the `epoll_wait` system call will block
    ///               (measured in milliseconds).
    /// * `events` - points to a memory area that will be used for storing the events
    ///              returned by `epoll_wait()` call.
    ///
    /// # Examples
    ///
    /// ```
    /// extern crate vmm_sys_util;
    ///
    /// use std::os::unix::io::AsRawFd;
    /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
    /// use vmm_sys_util::eventfd::EventFd;
    ///
    /// let epoll = Epoll::new().unwrap();
    /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
    ///
    /// let mut ready_events = vec![EpollEvent::default(); 10];
    /// epoll
    ///     .ctl(
    ///         ControlOperation::Add,
    ///         event_fd.as_raw_fd() as i32,
    ///         EpollEvent::new(EventSet::OUT, 4),
    ///     )
    ///     .unwrap();
    /// let ev_count = epoll.wait(-1, &mut ready_events[..]).unwrap();
    /// assert_eq!(ev_count, 1);
    /// ```
    pub fn wait(&self, timeout: i32, events: &mut [EpollEvent]) -> io::Result<usize> {
        let events_count = SyscallReturnCode(
            // SAFETY: Safe because we give a valid epoll file descriptor and an array of
            // epoll_event structures that will be modified by the kernel to indicate information
            // about the subset of file descriptors in the interest list.
            // We also check the return value.
            unsafe {
                epoll_wait(
                    self.epoll_fd,
                    events.as_mut_ptr() as *mut epoll_event,
                    events.len() as i32,
                    timeout,
                )
            },
        )
        .into_result()? as usize;

        Ok(events_count)
    }
}

impl AsRawFd for Epoll {
    fn as_raw_fd(&self) -> RawFd {
        self.epoll_fd
    }
}

impl Drop for Epoll {
    fn drop(&mut self) {
        // SAFETY: Safe because this fd is opened with `epoll_create` and we trust
        // the kernel to give us a valid fd.
        unsafe {
            libc::close(self.epoll_fd);
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    use crate::eventfd::EventFd;

    #[test]
    fn test_event_ops() {
        let mut event = EpollEvent::default();
        assert_eq!(event.events(), 0);
        assert_eq!(event.data(), 0);

        event = EpollEvent::new(EventSet::IN, 2);
        assert_eq!(event.events(), 1);
        assert_eq!(event.event_set(), EventSet::IN);

        assert_eq!(event.data(), 2);
        assert_eq!(event.fd(), 2);
    }

    #[test]
    fn test_events_debug() {
        let events = EpollEvent::new(EventSet::IN, 42);
        assert_eq!(format!("{:?}", events), "{ events: 1, data: 42 }")
    }

    #[test]
    fn test_epoll() {
        const DEFAULT__TIMEOUT: i32 = 250;
        const EVENT_BUFFER_SIZE: usize = 128;

        let epoll = Epoll::new().unwrap();
        assert_eq!(epoll.epoll_fd, epoll.as_raw_fd());

        // Let's test different scenarios for `epoll_ctl()` and `epoll_wait()` functionality.

        let event_fd_1 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
        // For EPOLLOUT to be available it is enough only to be possible to write a value of
        // at least 1 to the eventfd counter without blocking.
        // If we write a value greater than 0 to this counter, the fd will be available for
        // EPOLLIN events too.
        event_fd_1.write(1).unwrap();

        let mut event_1 =
            EpollEvent::new(EventSet::IN | EventSet::OUT, event_fd_1.as_raw_fd() as u64);

        // For EPOLL_CTL_ADD behavior we will try to add some fds with different event masks into
        // the interest list of epoll instance.
        assert!(epoll
            .ctl(
                ControlOperation::Add,
                event_fd_1.as_raw_fd() as i32,
                event_1
            )
            .is_ok());

        // We can't add twice the same fd to epoll interest list.
        assert!(epoll
            .ctl(
                ControlOperation::Add,
                event_fd_1.as_raw_fd() as i32,
                event_1
            )
            .is_err());

        let event_fd_2 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
        event_fd_2.write(1).unwrap();
        assert!(epoll
            .ctl(
                ControlOperation::Add,
                event_fd_2.as_raw_fd() as i32,
                // For this fd, we want an Event instance that has `data` field set to other
                // value than the value of the fd and `events` without EPOLLIN type set.
                EpollEvent::new(EventSet::OUT, 10)
            )
            .is_ok());

        // For the following eventfd we won't write anything to its counter, so we expect EPOLLIN
        // event to not be available for this fd, even if we say that we want to monitor this type
        // of event via EPOLL_CTL_ADD operation.
        let event_fd_3 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
        let event_3 = EpollEvent::new(EventSet::OUT | EventSet::IN, event_fd_3.as_raw_fd() as u64);
        assert!(epoll
            .ctl(
                ControlOperation::Add,
                event_fd_3.as_raw_fd() as i32,
                event_3
            )
            .is_ok());

        // Let's check `epoll_wait()` behavior for our epoll instance.
        let mut ready_events = vec![EpollEvent::default(); EVENT_BUFFER_SIZE];
        let mut ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();

        // We expect to have 3 fds in the ready list of epoll instance.
        assert_eq!(ev_count, 3);

        // Let's check also the Event values that are now returned in the ready list.
        assert_eq!(ready_events[0].data(), event_fd_1.as_raw_fd() as u64);
        // For this fd, `data` field was populated with random data instead of the
        // corresponding fd value.
        assert_eq!(ready_events[1].data(), 10);
        assert_eq!(ready_events[2].data(), event_fd_3.as_raw_fd() as u64);

        // EPOLLIN and EPOLLOUT should be available for this fd.
        assert_eq!(
            ready_events[0].events(),
            (EventSet::IN | EventSet::OUT).bits()
        );
        // Only EPOLLOUT is expected because we didn't want to monitor EPOLLIN on this fd.
        assert_eq!(ready_events[1].events(), EventSet::OUT.bits());
        // Only EPOLLOUT too because eventfd counter value is 0 (we didn't write a value
        // greater than 0 to it).
        assert_eq!(ready_events[2].events(), EventSet::OUT.bits());

        // Now we're gonna modify the Event instance for a fd to test EPOLL_CTL_MOD
        // behavior.
        // We create here a new Event with some events, other than those previously set,
        // that we want to monitor this time on event_fd_1.
        event_1 = EpollEvent::new(EventSet::OUT, 20);
        assert!(epoll
            .ctl(
                ControlOperation::Modify,
                event_fd_1.as_raw_fd() as i32,
                event_1
            )
            .is_ok());

        let event_fd_4 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
        // Can't modify a fd that wasn't added to epoll interest list.
        assert!(epoll
            .ctl(
                ControlOperation::Modify,
                event_fd_4.as_raw_fd() as i32,
                EpollEvent::default()
            )
            .is_err());

        let _ = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();

        // Let's check that Event fields were indeed changed for the `event_fd_1` fd.
        assert_eq!(ready_events[0].data(), 20);
        // EPOLLOUT is now available for this fd as we've intended with EPOLL_CTL_MOD operation.
        assert_eq!(ready_events[0].events(), EventSet::OUT.bits());

        // Now let's set for a fd to not have any events monitored.
        assert!(epoll
            .ctl(
                ControlOperation::Modify,
                event_fd_1.as_raw_fd() as i32,
                EpollEvent::default()
            )
            .is_ok());

        // In this particular case we expect to remain only with 2 fds in the ready list.
        ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
        assert_eq!(ev_count, 2);

        // Let's also delete a fd from the interest list.
        assert!(epoll
            .ctl(
                ControlOperation::Delete,
                event_fd_2.as_raw_fd() as i32,
                EpollEvent::default()
            )
            .is_ok());

        // We expect to have only one fd remained in the ready list (event_fd_3).
        ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();

        assert_eq!(ev_count, 1);
        assert_eq!(ready_events[0].data(), event_fd_3.as_raw_fd() as u64);
        assert_eq!(ready_events[0].events(), EventSet::OUT.bits());

        // If we try to remove a fd from epoll interest list that wasn't added before it will fail.
        assert!(epoll
            .ctl(
                ControlOperation::Delete,
                event_fd_4.as_raw_fd() as i32,
                EpollEvent::default()
            )
            .is_err());
    }
}