virtiofsd 1.5.0

A virtio-fs vhost-user device daemon
// SPDX-License-Identifier: BSD-3-Clause

use bitflags::bitflags;
use std::ffi::{CStr, CString};
use std::io::{Error, Result};
use std::os::unix::io::{AsRawFd, BorrowedFd, RawFd};

// A helper function that check the return value of a C function call
// and wraps it in a `Result` type, returning the `errno` code as `Err`.
fn check_retval<T: From<i8> + PartialEq>(t: T) -> Result<T> {
    if t == T::from(-1_i8) {
    } else {

/// Simple object to collect basic facts about the OS,
/// such as available syscalls.
pub struct OsFacts {
    pub has_openat2: bool,

impl OsFacts {
    /// This object should only be constructed using new.
    pub fn new() -> Self {
        // Checking for `openat2()` since it first appeared in Linux 5.6.
        // SAFETY: all-zero byte-pattern is a valid `libc::open_how`
        let how: libc::open_how = unsafe { std::mem::zeroed() };
        let cwd = CString::new(".").unwrap();
        // SAFETY: `cwd.as_ptr()` points to a valid NUL-terminated string,
        // and the `how` pointer is a valid pointer to an `open_how` struct.
        let fd = unsafe {

        let has_openat2 = fd >= 0;
        if has_openat2 {
            // SAFETY: `fd` is an open file descriptor
            unsafe {
                libc::close(fd as libc::c_int);

        Self { has_openat2 }

/// Safe wrapper for `mount(2)`
/// # Errors
/// Will return `Err(errno)` if `mount(2)` fails.
/// Each filesystem type may have its own special errors and its own special behavior,
/// see `mount(2)` and the linux source kernel for details.
/// # Panics
/// This function panics if the strings `source`, `target` or `fstype` contain an internal 0 byte.
pub fn mount(source: Option<&str>, target: &str, fstype: Option<&str>, flags: u64) -> Result<()> {
    let source = CString::new(source.unwrap_or("")).unwrap();
    let source = source.as_ptr();

    let target = CString::new(target).unwrap();
    let target = target.as_ptr();

    let fstype = CString::new(fstype.unwrap_or("")).unwrap();
    let fstype = fstype.as_ptr();

    // Safety: `source`, `target` or `fstype` are a valid C string pointers
    check_retval(unsafe { libc::mount(source, target, fstype, flags, std::ptr::null()) })?;

/// Safe wrapper for `umount2(2)`
/// # Errors
/// Will return `Err(errno)` if `umount2(2)` fails.
/// Each filesystem type may have its own special errors and its own special behavior,
/// see `umount2(2)` and the linux source kernel for details.
/// # Panics
/// This function panics if the strings `target` contains an internal 0 byte.
pub fn umount2(target: &str, flags: i32) -> Result<()> {
    let target = CString::new(target).unwrap();
    let target = target.as_ptr();

    // Safety: `target` is a valid C string pointer
    check_retval(unsafe { libc::umount2(target, flags) })?;

/// Safe wrapper for `fchdir(2)`
/// # Errors
/// Will return `Err(errno)` if `fchdir(2)` fails.
/// Each filesystem type may have its own special errors, see `fchdir(2)` for details.
pub fn fchdir(fd: RawFd) -> Result<()> {
    check_retval(unsafe { libc::fchdir(fd) })?;

/// Safe wrapper for `umask(2)`
pub fn umask(mask: u32) -> u32 {
    // SAFETY: this call doesn't modify any memory and there is no need
    // to check the return value because this system call always succeeds.
    unsafe { libc::umask(mask) }

/// An RAII implementation of a scoped file mode creation mask (umask), it set the
/// new umask. When this structure is dropped (falls out of scope), it set the previous
/// value of the mask.
pub struct ScopedUmask {
    umask: libc::mode_t,

impl ScopedUmask {
    pub fn new(new_umask: u32) -> Self {
        Self {
            umask: umask(new_umask),

impl Drop for ScopedUmask {
    fn drop(&mut self) {

/// Safe wrapper around `openat(2)`.
/// # Errors
/// Will return `Err(errno)` if `openat(2)` fails,
/// see `openat(2)` for details.
pub fn openat(dir: &impl AsRawFd, pathname: &CStr, flags: i32, mode: Option<u32>) -> Result<RawFd> {
    let mode = u64::from(mode.unwrap_or(0));

    // SAFETY: `pathname` points to a valid NUL-terminated string.
    // However, the caller must ensure that `dir` can provide a valid file descriptor.
    check_retval(unsafe {
            flags as libc::c_int,

/// An utility function that uses `openat2(2)` to restrict the how the provided pathname
/// is resolved. It uses the following flags:
/// - `RESOLVE_IN_ROOT`: Treat the directory referred to by dirfd as the root directory while
/// resolving pathname. This has the effect as though virtiofsd had used chroot(2) to modify its
/// root directory to dirfd.
/// - `RESOLVE_NO_MAGICLINKS`: Disallow all magic-link (i.e., proc(2) link-like files) resolution
/// during path resolution.
/// Additionally, the flags `O_NOFOLLOW` and `O_CLOEXEC` are added.
/// # Error
/// Will return `Err(errno)` if `openat2(2)` fails, see the man page for details.
/// # Safety
/// The caller must ensure that dirfd is a valid file descriptor.
pub fn do_open_relative_to(
    dir: &impl AsRawFd,
    pathname: &CStr,
    flags: i32,
    mode: Option<u32>,
) -> Result<RawFd> {
    // `openat2(2)` returns an error if `how.mode` contains bits other than those in range 07777,
    // let's ignore the extra bits to be compatible with `openat(2)`.
    let mode = u64::from(mode.unwrap_or(0)) & 0o7777;

    // SAFETY: all-zero byte-pattern represents a valid `libc::open_how`
    let mut how: libc::open_how = unsafe { std::mem::zeroed() };
    how.resolve = libc::RESOLVE_IN_ROOT | libc::RESOLVE_NO_MAGICLINKS;
    how.flags = flags as u64;
    how.mode = mode;

    // SAFETY: `pathname` points to a valid NUL-terminated string, and the `how` pointer is a valid
    // pointer to an `open_how` struct. However, the caller must ensure that `dir` can provide a
    // valid file descriptor (this can be changed to BorrowedFd).
    check_retval(unsafe {
    } as RawFd)

mod writev {
    /// musl does not provide a wrapper for the `pwritev2(2)` system call,
    /// we need to call it using `syscall(2)`.

    #[cfg(target_env = "gnu")]
    pub use libc::pwritev2;

    #[cfg(target_env = "musl")]
    pub unsafe fn pwritev2(
        fd: libc::c_int,
        iov: *const libc::iovec,
        iovcnt: libc::c_int,
        offset: libc::off_t,
        flags: libc::c_int,
    ) -> libc::ssize_t {
        // The `pwritev2(2)` syscall expects to receive the 64-bit offset split in
        // its high and low parts (see `syscall(2)`). On 64-bit architectures we
        // set `lo_off=offset` and `hi_off=0` (glibc does it), since `hi_off` is cleared,
        // so we need to make sure of not clear the higher 32 bits of `lo_off`, otherwise
        // the offset will be 0 on 64-bit architectures.
        let lo_off = offset as libc::c_long; // warn: do not clear the higher 32 bits
        let hi_off = (offset as u64).checked_shr(libc::c_long::BITS).unwrap_or(0) as libc::c_long;
        unsafe {
            libc::syscall(libc::SYS_pwritev2, fd, iov, iovcnt, lo_off, hi_off, flags)
                as libc::ssize_t

// We cannot use libc::RWF_HIPRI, etc, because these constants are not defined in musl.
bitflags! {
    /// A bitwise OR of zero or more flags passed in as a parameter to the
    /// write vectored function `writev_at()`.
    pub struct WritevFlags: i32 {
        /// High priority write. Allows block-based filesystems to use polling of the device, which
        /// provides lower latency, but may use additional resources. (Currently, this feature is
        /// usable only on a file descriptor opened using the O_DIRECT flag.)
        const RWF_HIPRI = 0x00000001;

        /// Provide a per-write equivalent of the O_DSYNC open(2) flag. Its effect applies
        /// only to the data range written by the system call.
        const RWF_DSYNC = 0x00000002;

        /// Provide a per-write equivalent of the O_SYNC open(2) flag. Its effect applies only
        /// to the data range written by the system call.
        const RWF_SYNC = 0x00000004;

        /// Provide a per-write equivalent of the O_APPEND open(2) flag. Its effect applies only
        /// to the data range written by the system call. The offset argument does not affect the
        /// write operation; the data is always appended to the end of the file.
        /// However, if the offset argument is -1, the current file offset is updated.
        const RWF_APPEND = 0x00000010;

#[cfg(target_env = "gnu")]
mod writev_test {
    // Lets make sure (at compile time) that the WritevFlags don't go out of sync with the libc
    const _: () = assert!(
        super::WritevFlags::RWF_HIPRI.bits() == libc::RWF_HIPRI,
        "invalid RWF_HIPRI value"
    const _: () = assert!(
        super::WritevFlags::RWF_DSYNC.bits() == libc::RWF_DSYNC,
        "invalid RWF_DSYNC value"
    const _: () = assert!(
        super::WritevFlags::RWF_SYNC.bits() == libc::RWF_SYNC,
        "invalid RWF_SYNC value"
    const _: () = assert!(
        super::WritevFlags::RWF_APPEND.bits() == libc::RWF_APPEND,
        "invalid RWF_APPEND value"

/// Safe wrapper for `pwritev2(2)`
/// This system call is similar `pwritev(2)`, but add a new argument,
/// flags, which modifies the behavior on a per-call basis.
/// Unlike `pwritev(2)`, if the offset argument is -1, then the current file offset
/// is used and updated.
/// # Errors
/// Will return `Err(errno)` if `pwritev2(2)` fails, see `pwritev2(2)` for details.
/// # Safety
/// The caller must ensure that each iovec element is valid (i.e., it has a valid `iov_base`
/// pointer and `iov_len`).
pub fn writev_at(
    fd: BorrowedFd,
    iovecs: &[libc::iovec],
    offset: i64,
    flags: Option<WritevFlags>,
) -> Result<usize> {
    let flags = flags.unwrap_or(WritevFlags::empty());
    // SAFETY: `fd` is a valid filed descriptor, `iov` is a valid pointer
    // to the iovec slice `ìovecs` of `iovcnt` elements. However, the caller
    // must ensure that each iovec element has a valid `iov_base` pointer and `iov_len`.
    let bytes_written = check_retval(unsafe {
            iovecs.len() as libc::c_int,
    Ok(bytes_written as usize)