syd 3.52.0

rock-solid application kernel
Documentation
//
// Syd: rock-solid application kernel
// src/kernel/exec.rs: exec(3) handlers
//
// Copyright (c) 2023, 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use libseccomp::ScmpNotifResp;
use nix::{errno::Errno, fcntl::AtFlags};

use crate::{
    fd::{is_executable, AT_EXECVE_CHECK},
    kernel::{sandbox_path, to_atflags},
    lookup::{FileType, FsFlags},
    req::{SysArg, SysFlags, UNotifyEventRequest},
    sandbox::Capability,
};

pub(crate) fn sys_execve(request: UNotifyEventRequest) -> ScmpNotifResp {
    let arg = SysArg {
        path: Some(0),
        fsflags: FsFlags::MUST_PATH,
        ..Default::default()
    };
    syscall_exec_handler(request, "execve", arg)
}

pub(crate) fn sys_execveat(request: UNotifyEventRequest) -> ScmpNotifResp {
    let req = request.scmpreq;

    // Reject undefined/invalid flags.
    let flags = match to_atflags(
        req.data.args[4],
        AtFlags::AT_SYMLINK_NOFOLLOW | AtFlags::AT_EMPTY_PATH | AT_EXECVE_CHECK,
    ) {
        Ok(flags) => flags,
        Err(errno) => return request.fail_syscall(errno),
    };

    let mut fsflags = FsFlags::MUST_PATH;
    if flags.contains(AtFlags::AT_SYMLINK_NOFOLLOW) {
        fsflags.insert(FsFlags::NO_FOLLOW_LAST);
    }

    let empty_path = flags.contains(AtFlags::AT_EMPTY_PATH);
    let arg = SysArg {
        dirfd: Some(0),
        path: Some(1),
        flags: if empty_path {
            SysFlags::EMPTY_PATH
        } else {
            SysFlags::empty()
        },
        fsflags,
    };
    syscall_exec_handler(request, "execveat", arg)
}

// This handler only runs with trace/allow_unsafe_ptrace:1, and it's
// vulnerable to TOCTOU. With ptrace on, this is mitigated using the
// TOCTOU-mitigator, see the wait() function for context.
// See: https://bugzilla.kernel.org/show_bug.cgi?id=218501
fn syscall_exec_handler(
    request: UNotifyEventRequest,
    syscall_name: &str,
    arg: SysArg,
) -> ScmpNotifResp {
    syscall_handler!(request, |request: UNotifyEventRequest| {
        // We perform the exec checks even if exec sandboxing is off.
        // In this case, the path check will be skipped,
        // and only the file executability check (aka AT_EXECVE_CHECK)
        // will be performed.
        let sandbox = request.get_sandbox();
        let restrict_memfd = !sandbox.flags.allow_unsafe_memfd();

        // Read remote path.
        let (path, _, _) = request.read_path(&sandbox, arg)?;

        // Call sandbox access checker if Exec sandboxing is on.
        if sandbox.enabled(Capability::CAP_EXEC) {
            sandbox_path(
                Some(&request),
                &sandbox,
                request.scmpreq.pid(), // Unused when request.is_some()
                path.abs(),
                Capability::CAP_EXEC,
                syscall_name,
            )?;
        }
        drop(sandbox); // release the read-lock.

        if !arg.fsflags.follow_last()
            && path
                .typ
                .as_ref()
                .map(|typ| typ.is_symlink() || typ.is_magic_link())
                .unwrap_or(false)
        {
            // AT_SYMLINK_NOFOLLOW: If the file identified by dirfd and
            // a non-NULL pathname is a symbolic link, then the call
            // fails with the error ELOOP.
            return Err(Errno::ELOOP);
        }

        // Return EACCES without any more processing if the file is not
        // a regular file or a memory fd. Mfd check depends on
        // trace/allow_unsafe_memfd option.
        //
        // Note, attempting to execute directories on Linux
        // return EACCES, not EISDIR like the manual page
        // claims. GNU make has a test checking this errno.
        match path.typ.as_ref() {
            Some(FileType::Reg) => {}
            Some(FileType::Mfd) if !restrict_memfd => {}
            //Some(FileType::Dir) => return Err(Errno::EISDIR),
            _ => return Err(Errno::EACCES),
        };

        // Return EACCES without any more processing if file is not executable.
        // This uses AT_EXECVE_CHECK on Linux>=6.14.
        if !is_executable(path.dir()) {
            return Err(Errno::EACCES);
        }

        // SAFETY: This is vulnerable to TOCTOU,
        // See the comment at function header.
        Ok(unsafe { request.continue_syscall() })
    })
}