syd 3.54.1

rock-solid application kernel
Documentation
//
// Syd: rock-solid application kernel
// src/kernel/getdents.rs: getdents64(2) and getdents(2) handlers
//
// Copyright (c) 2023, 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use libseccomp::ScmpNotifResp;
use nix::{errno::Errno, NixPath};

use crate::{
    compat::{dirent_buf, dirent_shift_left, dirent_write_old, getdents64},
    config::DIRENT_BUF_SIZE,
    confine::scmp_arch_is_compat32,
    fd::to_fd,
    kernel::sandbox_path,
    lookup::CanonicalPath,
    req::UNotifyEventRequest,
    sandbox::Capability,
};

pub(crate) fn sys_getdents(request: UNotifyEventRequest) -> ScmpNotifResp {
    let is32 = scmp_arch_is_compat32(request.scmpreq.data.arch);
    syscall_getdents_handler(request, "getdents", Some(is32))
}

pub(crate) fn sys_getdents64(request: UNotifyEventRequest) -> ScmpNotifResp {
    syscall_getdents_handler(request, "getdents64", None)
}

// Common handler for getdents64(2) and getdents(2) system calls.
#[expect(clippy::cognitive_complexity)]
fn syscall_getdents_handler(
    request: UNotifyEventRequest,
    sysname: &str,
    is32: Option<bool>,
) -> ScmpNotifResp {
    syscall_handler!(request, |request: UNotifyEventRequest| {
        let req = request.scmpreq;

        // Validate file descriptor.
        //
        // AT_FDCWD is an invalid file descriptor.
        let fd = to_fd(req.data.args[0])?;

        // Get remote fd, and
        // Readlink /proc/thread-self/fd/$fd.
        //
        // Readdir access check here has been moved to the _open_(2) handler
        // for simplicity and efficiency. The List check still takes place.
        let fd = request.get_fd(fd)?;

        // Lock sandbox for read to read flags and options.
        let sandbox = request.get_sandbox();
        let check_flags = (*sandbox.flags, *sandbox.options);
        drop(sandbox); // release read lock.
        let restrict_deleted = !check_flags.0.allow_unsafe_deleted();
        let check_flags = check_flags.into();

        let mut path = CanonicalPath::new_fd(fd.into(), req.pid(), restrict_deleted)?;
        if !path.is_dir() {
            return Err(Errno::ENOTDIR);
        }

        let fd = path.take_dir();
        let mut dir = path.take();

        // Linux kernel truncates upper bits.
        #[expect(clippy::cast_possible_truncation)]
        let count = req.data.args[2] as u32;

        // Count argument to the getdents64(2) call must not be fully
        // trusted, it can be overly large, and allocating a Vector of
        // that capacity may overflow.
        let count = (count as usize).min(DIRENT_BUF_SIZE);

        // There's no guarantee on the order of items returned by
        // getdents64(2), therefore we must potentially check each
        // element for dot or dotdot, until we actually see them.
        let mut seen_dot = false;
        let mut seen_dotdot = false;

        let pid = req.pid();
        let len = dir.len();
        let mut ret: usize = 0;
        while ret == 0 {
            // Zero size returns EINVAL only if file is not at EOF,
            // otherwise it returns 0. We must ask the kernel to make
            // sure we return the correct value.
            let siz = count.checked_sub(ret).ok_or(Errno::EOVERFLOW)?;
            let mut entries = match getdents64(&fd, siz) {
                Ok(entries) => entries,
                Err(Errno::ECANCELED) => break, // EOF or empty directory
                Err(errno) => return Err(errno),
            };

            // Shift visible entries over hidden ones.
            let mut pos = 0usize;
            let mut skip = 0usize;
            let mut ptr: *mut u8 = std::ptr::null_mut();
            let buf_base = entries.as_mut_ptr();

            // Lock sandbox for read to perform Stat access check.
            let sandbox = request.get_sandbox();

            for entry in &mut entries {
                let entry_off = entry.buf_offset();

                // Allow the special dot entries `.` and `..`. `..` may
                // point to a denylisted directory, however at this
                // point there's not much we can do: even the root
                // directory, ie `/`, has a `..`. In this exceptional
                // case `..` points to `.`.
                if !seen_dot && entry.is_dot() {
                    seen_dot = true;
                } else if !seen_dotdot && entry.is_dotdot() {
                    seen_dotdot = true;
                } else {
                    // Append entry name to the directory.
                    dir.push(entry.name_bytes());

                    // Run XPath::check() with file type for global restrictions.
                    if dir
                        .check(
                            pid,
                            Some(&entry.file_type()),
                            Some(entry.as_xpath()),
                            check_flags,
                        )
                        .is_err()
                    {
                        // Skip entry.
                        dir.truncate(len);
                        skip = skip.checked_add(entry.size()).ok_or(Errno::EOVERFLOW)?;
                        continue;
                    }

                    // Check for sandbox access with List capability.
                    let hide = sandbox_path(
                        Some(&request),
                        &sandbox,
                        request.scmpreq.pid(), // Unused when request.is_some()
                        &dir,
                        Capability::CAP_LIST,
                        sysname,
                    )
                    .is_err();

                    // Restore directory entry.
                    dir.truncate(len);

                    if hide {
                        // Skip entry.
                        skip = skip.checked_add(entry.size()).ok_or(Errno::EOVERFLOW)?;
                        continue;
                    }
                }

                // Access granted:
                // 1. Entry will be written to sandbox process memory.
                // 2. Handle truncation as necessary.
                let reclen;
                let entry_size = entry.size();
                if let Some(is32) = is32 {
                    // getdents(2): convert dirent64 to linux_dirent.
                    let d_ino = entry.ino();
                    let d_off = entry.d_off();
                    let d_type = entry.d_type();
                    let name_ptr = entry.name_ptr();
                    let name_len = entry.name_len();

                    let dst = if ptr.is_null() {
                        // SAFETY: buf_base plus entry_off is this
                        // entry's position in DirIter buffer.
                        unsafe { buf_base.add(entry_off) }
                    } else {
                        // SAFETY: ptr plus pos lies within DirIter buffer.
                        unsafe { ptr.add(pos) }
                    };
                    if ptr.is_null() {
                        ptr = dst;
                    }

                    // SAFETY:
                    // 1. dst points into DirIter buffer.
                    // 2. No references to buffer are live.
                    reclen = match unsafe {
                        dirent_write_old(dst, is32, d_ino, d_off, d_type, name_ptr, name_len)
                    } {
                        Ok(n) => n,
                        Err(Errno::EOVERFLOW) if pos > 0 => break,
                        Err(errno) => return Err(errno),
                    };

                    let rem = count.checked_sub(pos).ok_or(Errno::EOVERFLOW)?;
                    if reclen > rem {
                        break;
                    }

                    skip = skip.checked_add(entry_size).ok_or(Errno::EOVERFLOW)?;
                } else {
                    // getdents64(2): shift dirent64 entries in-place.
                    reclen = entry_size;
                    let rem = count.checked_sub(pos).ok_or(Errno::EOVERFLOW)?;
                    if reclen > rem {
                        break;
                    }

                    // SAFETY:
                    // 1. buf_base plus offsets lie within DirIter buffer.
                    // 2. No references to buffer are live.
                    #[expect(clippy::arithmetic_side_effects)]
                    let dst = unsafe {
                        let src = buf_base.add(entry_off) as *const u8;
                        let dst = buf_base.add(entry_off - skip);
                        dirent_shift_left(src, reclen, dst)
                    };
                    if ptr.is_null() {
                        ptr = dst;
                    }
                }

                pos = pos.checked_add(reclen).ok_or(Errno::EOVERFLOW)?;
                if pos >= count {
                    break;
                }
            }

            // Write buffer to sandbox process memory.
            if pos > 0 {
                // SAFETY:
                // 1. ptr points into thread-local DirIter buffer.
                // 2. pos <= count <= DIRENT_BUF_SIZE.
                // 3. No getdents64 call intervenes.
                let buf = unsafe { dirent_buf(ptr, pos) };
                request.write_mem_all(buf, req.data.args[1])?;
            }

            ret = pos;
        }

        #[expect(clippy::cast_possible_wrap)]
        Ok(request.return_syscall(ret as i64))
    })
}