syd 3.52.0

rock-solid application kernel
Documentation
//
// Syd: rock-solid application kernel
// src/kernel/signal.rs: Signal syscall handlers
//
// Copyright (c) 2023, 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use std::{env, os::unix::ffi::OsStrExt};

use bitflags::bitflags;
use btoi::btoi;
use libseccomp::ScmpNotifResp;
use nix::{
    errno::Errno,
    unistd::{getpgid, getpgrp, Pid},
};

use crate::{
    config::HAVE_PIDFD_THREAD,
    confine::scmp_arch_nsig,
    fd::{PIDFD_NONBLOCK, PIDFD_THREAD},
    fs::{tgkill, tkill},
    req::UNotifyEventRequest,
};

bitflags! {
    // Bitflags describing signal system calls
    #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
    struct SignalSyscall: u8 {
        // True if signal is group directed.
        const SIGSYS_GROUP = 1 << 0;
        // True if signal is thread directed.
        const SIGSYS_THREAD = 1 << 1;
        // True if signal is queued.
        const SIGSYS_QUEUE = 1 << 2;
        // True if this is pidfd_open(2).
        const SIGSYS_PIDFD = 1 << 3;
    }
}

impl SignalSyscall {
    fn is_group(self) -> bool {
        self.contains(Self::SIGSYS_GROUP)
    }

    fn is_thread(self) -> bool {
        self.contains(Self::SIGSYS_THREAD)
    }

    fn is_thread_group(self) -> bool {
        self.contains(Self::SIGSYS_GROUP | Self::SIGSYS_THREAD)
    }

    fn is_queue(self) -> bool {
        self.contains(Self::SIGSYS_QUEUE)
    }

    fn is_pidfd(self) -> bool {
        self.contains(Self::SIGSYS_PIDFD)
    }
}

pub(crate) fn sys_kill(request: UNotifyEventRequest) -> ScmpNotifResp {
    syscall_signal_handler(request, SignalSyscall::empty())
}

pub(crate) fn sys_tgkill(request: UNotifyEventRequest) -> ScmpNotifResp {
    syscall_signal_handler(
        request,
        SignalSyscall::SIGSYS_GROUP | SignalSyscall::SIGSYS_THREAD,
    )
}

pub(crate) fn sys_tkill(request: UNotifyEventRequest) -> ScmpNotifResp {
    syscall_signal_handler(request, SignalSyscall::SIGSYS_THREAD)
}

pub(crate) fn sys_rt_sigqueueinfo(request: UNotifyEventRequest) -> ScmpNotifResp {
    syscall_signal_handler(request, SignalSyscall::SIGSYS_QUEUE)
}

pub(crate) fn sys_rt_tgsigqueueinfo(request: UNotifyEventRequest) -> ScmpNotifResp {
    syscall_signal_handler(
        request,
        SignalSyscall::SIGSYS_QUEUE | SignalSyscall::SIGSYS_GROUP | SignalSyscall::SIGSYS_THREAD,
    )
}

pub(crate) fn sys_pidfd_open(request: UNotifyEventRequest) -> ScmpNotifResp {
    // Validate pidfd_open(2) flags.
    #[expect(clippy::cast_possible_truncation)]
    let flags = request.scmpreq.data.args[1] as u32;
    let valid_flags = if *HAVE_PIDFD_THREAD {
        PIDFD_NONBLOCK | PIDFD_THREAD
    } else {
        PIDFD_NONBLOCK
    };
    if flags & !valid_flags != 0 {
        return request.fail_syscall(Errno::EINVAL);
    }
    let syscall = if *HAVE_PIDFD_THREAD && flags & PIDFD_THREAD != 0 {
        SignalSyscall::SIGSYS_PIDFD | SignalSyscall::SIGSYS_THREAD
    } else {
        SignalSyscall::SIGSYS_PIDFD
    };
    syscall_signal_handler(request, syscall)
}

// Maximum PID (wrap-around limit) supported by the kernel.
//
// On 32-bit platforms this is fixed at 32768.
// On 64-bit platforms it can go up to 2^22 (approximately 4 million).
//
// Note, Syd may be built as 32-bit on a 64-bit platform,
// therefore we always use the 64-bit limit.
const PID_MAX_LIMIT: libc::pid_t = 1 << 22;

// Handles syscalls related to signal handling, protecting Syd process
// and their threads from signals.
fn syscall_signal_handler(request: UNotifyEventRequest, syscall: SignalSyscall) -> ScmpNotifResp {
    syscall_handler!(request, |request: UNotifyEventRequest| {
        let req = request.scmpreq;

        // Validate pid/tid.
        #[expect(clippy::cast_possible_truncation)]
        let pid = req.data.args[0] as libc::pid_t;
        #[expect(clippy::cast_possible_truncation)]
        let tid = req.data.args[1] as libc::pid_t;

        // kill(2) returns ESRCH for i32::MIN.
        if syscall.is_empty() && pid == i32::MIN {
            return Err(Errno::ESRCH);
        }

        // kill(2) allows negative PIDs.
        if syscall.is_empty() && !(-PID_MAX_LIMIT..=PID_MAX_LIMIT).contains(&pid) {
            return Err(Errno::ESRCH);
        }

        // rt_{tg,}sigqueueinfo return ESRCH/EINVAL on negative TGID.
        if syscall.is_queue() && !(1..=PID_MAX_LIMIT).contains(&pid) {
            return Err(if syscall.is_group() {
                if pid > 0 && tid > 0 {
                    Errno::ESRCH
                } else {
                    Errno::EINVAL
                }
            } else {
                Errno::ESRCH
            });
        }

        // pidfd_open(2) rejects negative/zero PIDs.
        if syscall.is_pidfd() && pid <= 0 {
            return Err(Errno::EINVAL);
        }

        // tkill(2), tgkill(2) and pidfd_open(2) return EINVAL on negative TGID.
        if syscall.is_thread() && !(1..=PID_MAX_LIMIT).contains(&pid) {
            return Err(if pid > 0 && (!syscall.is_thread_group() || tid > 0) {
                Errno::ESRCH
            } else {
                Errno::EINVAL
            });
        }

        // tgkill(2) returns EINVAL on negative TID.
        if syscall.is_group() && !(1..=PID_MAX_LIMIT).contains(&tid) {
            return Err(if pid > 0 && tid > 0 {
                Errno::ESRCH
            } else {
                Errno::EINVAL
            });
        }

        // Linux returns ESRCH for nonexistent pid before EINVAL for invalid sig.
        if !syscall.is_pidfd() && !(syscall.is_empty() && (pid == 0 || pid == -1)) {
            let pid = Pid::from_raw(pid);
            let tid = Pid::from_raw(tid);
            let result = if syscall.is_thread_group() {
                tgkill(pid, tid, 0)
            } else if syscall.is_thread() {
                tkill(pid, 0)
            } else {
                tgkill(pid, pid, 0)
            };
            if result == Err(Errno::ESRCH) {
                return Err(Errno::ESRCH);
            }
        }

        // Linux rejects invalid signal numbers.
        if !syscall.is_pidfd() {
            let sig_idx = if syscall.is_thread_group() { 2 } else { 1 };

            #[expect(clippy::cast_possible_truncation)]
            let sig = req.data.args[sig_idx] as libc::c_int;

            if !(0..=scmp_arch_nsig(req.data.arch)).contains(&sig) {
                return Err(Errno::EINVAL);
            }
        }

        // Guard syd tasks.
        //
        // pid <=0 only for kill here.
        if pid == -1 {
            // We do not allow mass signaling with -1.
            // ESRCH errno(3) is consistent with landlock(7) scoped signals.
            return Err(Errno::ESRCH);
        } else if pid == 0 {
            // This is a version of killpg(3):
            // We must prevent this signal if Syd is in the same process group
            // as the sandbox process, otherwise continue is safe.
            return match getpgid(Some(req.pid())) {
                Ok(pgrp) if pgrp == getpgrp() => Err(Errno::EPERM),
                Err(_) => Err(Errno::ESRCH),
                _ => {
                    // SAFETY: No pointer dereference during access check.
                    Ok(unsafe { request.continue_syscall() })
                }
            };
        }

        // kill and sigqueue support negative PIDs.
        let pid_abs = if syscall.is_thread() { pid } else { pid.abs() };

        // Check for Syd tasks.
        let syd = Pid::this();
        let pid = Pid::from_raw(pid);
        let tid = Pid::from_raw(tid);
        let pid_abs = Pid::from_raw(pid_abs);

        // ESRCH errno(3) is consistent with landlock(7) scoped signals.
        if !syscall.is_thread() && syd == pid_abs {
            return Err(Errno::ESRCH);
        }

        if syscall.is_thread() && syd == pid {
            return Err(Errno::ESRCH);
        }

        if syscall.is_thread_group() && syd == tid {
            return Err(Errno::ESRCH);
        }

        // Check for Syd threads with the abstract PID.
        if !syscall.is_thread() && tgkill(syd, pid_abs, 0).is_ok() {
            return Err(Errno::ESRCH);
        }

        // Check for Syd threads with the PID.
        if syscall.is_thread() && tgkill(syd, pid, 0).is_ok() {
            return Err(Errno::ESRCH);
        }

        // Check for Syd threads with the TID.
        if syscall.is_thread_group() && pid != tid && tgkill(syd, tid, 0).is_ok() {
            return Err(Errno::ESRCH);
        }

        // Check signals directed to Syd's process group.
        let syd_pgid = getpgrp();
        if !syscall.is_thread() && syd_pgid == pid_abs {
            return Err(Errno::ESRCH);
        }
        if syscall.is_thread() && syd_pgid == pid {
            return Err(Errno::ESRCH);
        }
        if syscall.is_thread_group() && syd_pgid == tid {
            return Err(Errno::ESRCH);
        }

        // Check signals directed to syd-pty.
        if let Some(syd_pty) = env::var_os("SYD_PID_PTY")
            .and_then(|pid| btoi::<libc::pid_t>(pid.as_bytes()).ok())
            .map(Pid::from_raw)
        {
            // ESRCH errno(3) is consistent with landlock(7) scoped signals.
            if !syscall.is_thread() && syd_pty == pid_abs {
                return Err(Errno::ESRCH);
            }

            if syscall.is_thread() && syd_pty == pid {
                return Err(Errno::ESRCH);
            }

            if syscall.is_thread_group() && syd_pty == tid {
                return Err(Errno::ESRCH);
            }
        }

        // Check signals directed to syd-tor.
        if let Some(syd_tor) = env::var_os("SYD_PID_TOR")
            .and_then(|pid| btoi::<libc::pid_t>(pid.as_bytes()).ok())
            .map(Pid::from_raw)
        {
            // ESRCH errno(3) is consistent with landlock(7) scoped signals.
            if !syscall.is_thread() && syd_tor == pid_abs {
                return Err(Errno::ESRCH);
            }

            if syscall.is_thread() && syd_tor == pid {
                return Err(Errno::ESRCH);
            }

            if syscall.is_thread_group() && syd_tor == tid {
                return Err(Errno::ESRCH);
            }
        }

        // SAFETY: No pointer dereference in access check.
        Ok(unsafe { request.continue_syscall() })
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_signal_syscall_is_group_1() {
        assert!(SignalSyscall::SIGSYS_GROUP.is_group());
    }

    #[test]
    fn test_signal_syscall_is_group_2() {
        assert!(!SignalSyscall::empty().is_group());
    }

    #[test]
    fn test_signal_syscall_is_thread_1() {
        assert!(SignalSyscall::SIGSYS_THREAD.is_thread());
    }

    #[test]
    fn test_signal_syscall_is_thread_2() {
        assert!(!SignalSyscall::empty().is_thread());
    }

    #[test]
    fn test_signal_syscall_is_thread_group_1() {
        let flags = SignalSyscall::SIGSYS_GROUP | SignalSyscall::SIGSYS_THREAD;
        assert!(flags.is_thread_group());
    }

    #[test]
    fn test_signal_syscall_is_thread_group_2() {
        assert!(!SignalSyscall::SIGSYS_GROUP.is_thread_group());
    }

    #[test]
    fn test_signal_syscall_is_thread_group_3() {
        assert!(!SignalSyscall::SIGSYS_THREAD.is_thread_group());
    }

    #[test]
    fn test_signal_syscall_is_queue_1() {
        assert!(SignalSyscall::SIGSYS_QUEUE.is_queue());
    }

    #[test]
    fn test_signal_syscall_is_queue_2() {
        assert!(!SignalSyscall::empty().is_queue());
    }
}