#![allow(clippy::undocumented_unsafe_blocks)]
use std::{
env, mem,
os::{
fd::{AsRawFd, FromRawFd, RawFd},
unix::ffi::OsStrExt,
},
ptr,
};
use btoi::btoi;
use memchr::arch::all::is_prefix;
use nix::{
errno::Errno,
libc,
sys::{
resource::{setrlimit, Resource},
signal::{kill, sigprocmask, SigSet, SigmaskHow, Signal},
},
unistd::{execvp, getpid, read, setsid, tcsetpgrp, write},
};
use crate::{
caps::Capability,
compat::{seccomp_load_listener, set_dumpable, set_name, set_pdeathsig},
config::*,
confine::{
confine_landlock_scope, confine_scmp_ioctl_cld, confine_scmp_kptr, confine_scmp_pwritev2,
safe_drop_cap,
},
fd::{close, close_static_files, SafeOwnedFd, ROOT_FILE},
retry::retry_on_eintr,
unshare::{config::Config, error::ErrorCode as Err, run::ChildInfo},
};
unsafe fn fail_errno(code: Err, errno: i32) -> ! {
let msg = match code {
Err::CapSet => c"syd: capset error".as_ptr(),
Err::Exec => c"syd: exec error".as_ptr(),
Err::ParentDeathSignal => c"syd: parent-death-signal error".as_ptr(),
Err::PreExec => c"syd: pre-exec error".as_ptr(),
Err::ProcessStop => c"syd: error stopping process".as_ptr(),
Err::ResetSignal => c"syd: error resetting signals".as_ptr(),
Err::SetResourceLimits => c"syd: error setting resource limits".as_ptr(),
Err::LandlockFilterScopedSignals => c"syd: error scoping signals with landlock".as_ptr(),
Err::Seccomp => c"syd: seccomp error".as_ptr(),
Err::SeccompFilterIoctl => c"syd: seccomp filter ioctl error".as_ptr(),
Err::SeccompFilterAppendOnly => c"syd: seccomp filter pwritev2 error".as_ptr(),
Err::SeccompFilterKptr => c"syd: seccomp filter kernel pointer error".as_ptr(),
Err::SeccompSendFd => c"syd: seccomp send notify-fd error".as_ptr(),
Err::SeccompWaitFd => c"syd: seccomp wait for notify-fd error".as_ptr(),
Err::SetDumpable => c"syd: error resetting process dumpable attribute".as_ptr(),
Err::SetSid => c"syd: setsid error".as_ptr(),
Err::SetPty => c"syd: error setting pty as controlling terminal".as_ptr(),
Err::DupPty => c"syd: error duplicating pty onto stdio fds".as_ptr(),
Err::SetPgrp => c"syd: error setting foreground process group".as_ptr(),
Err::SetTSC => c"syd: set-tsc error".as_ptr(),
};
Errno::set_raw(errno);
libc::perror(msg as *const libc::c_char);
libc::_exit(errno);
}
macro_rules! fail_safe {
($child:expr, $error:expr) => {
let errno = Errno::last_raw();
unsafe { fail_errno($error, errno) }
};
}
macro_rules! fail_errno_safe {
($child:expr, $error:expr, $errno:expr) => {
unsafe { fail_errno($error, $errno) }
};
}
#[expect(clippy::cognitive_complexity)]
pub fn child_after_clone(mut child: Box<ChildInfo>) -> ! {
let _ = set_name(c"syd_exec");
let (pipe_ro, pipe_rw) = unsafe {
(
SafeOwnedFd::from_raw_fd(child.seccomp_pipefd.0 .0),
SafeOwnedFd::from_raw_fd(child.seccomp_pipefd.1 .1),
)
};
let _ = close(child.seccomp_pipefd.0 .1);
let _ = close(child.seccomp_pipefd.1 .0);
if let Some(&sig) = child.cfg.death_sig.as_ref() {
if let Err(errno) = set_pdeathsig(Some(sig)) {
fail_errno_safe!(child, Err::ParentDeathSignal, errno as i32);
}
}
if let Some(pty_fd) = child.pty_fd.take() {
let pty_fd = unsafe { SafeOwnedFd::from_raw_fd(pty_fd) };
if let Err(errno) = setsid() {
fail_errno_safe!(child, Err::SetSid, errno as i32);
}
if let Err(errno) =
Errno::result(unsafe { libc::ioctl(pty_fd.as_raw_fd(), libc::TIOCSCTTY, 0) })
{
fail_errno_safe!(child, Err::SetPty, errno as i32);
}
if let Err(errno) = tcsetpgrp(&pty_fd, getpid()) {
fail_errno_safe!(child, Err::SetPgrp, errno as i32);
}
for std_fd in [libc::STDIN_FILENO, libc::STDOUT_FILENO, libc::STDERR_FILENO] {
if let Err(errno) = Errno::result(unsafe { libc::dup2(pty_fd.as_raw_fd(), std_fd) }) {
fail_errno_safe!(child, Err::DupPty, errno as i32);
}
}
drop(pty_fd);
}
if child.cfg.restore_sigmask {
unsafe {
let mut sigmask: libc::sigset_t = mem::zeroed();
libc::sigemptyset(&raw mut sigmask);
libc::pthread_sigmask(libc::SIG_SETMASK, &raw const sigmask, ptr::null_mut());
}
let sigmask = SigSet::all();
if let Err(errno) = sigprocmask(SigmaskHow::SIG_UNBLOCK, Some(&sigmask), None) {
fail_errno_safe!(child, Err::ResetSignal, errno as i32);
}
if let Err(errno) = crate::reset_signals() {
fail_errno_safe!(child, Err::ResetSignal, errno as i32);
}
}
if let Some(denylist) = child.ioctl_denylist.take() {
if let Err(error) = confine_scmp_ioctl_cld(&denylist, child.cfg.ssb) {
let errno = error.errno().unwrap_or(Errno::ENOSYS);
fail_errno_safe!(child, Err::SeccompFilterIoctl, errno as i32);
}
}
if child.cfg.append_only {
if let Err(error) = confine_scmp_pwritev2(child.cfg.ssb) {
let errno = error.errno().unwrap_or(Errno::ENOSYS);
fail_errno_safe!(child, Err::SeccompFilterAppendOnly, errno as i32);
}
}
if child.cfg.restrict_kptr {
if let Err(error) = confine_scmp_kptr(child.cfg.ssb) {
let errno = error.errno().unwrap_or(Errno::ENOSYS);
fail_errno_safe!(child, Err::SeccompFilterKptr, errno as i32);
}
}
if let Err(errno) = confine_landlock_scope(
Some(ROOT_FILE()),
child.cfg.landlock_access_fs,
child.cfg.landlock_access_net,
child.cfg.landlock_scoped_abs,
) {
fail_errno_safe!(child, Err::LandlockFilterScopedSignals, errno as i32);
}
close_static_files();
const CLOSE_FD_ENVS: &[&str] = &[ENV_LOG_FD, ENV_IPC_POLL_FD, ENV_IPC_UNIX_FD];
for env in CLOSE_FD_ENVS {
let fd = if let Some(fd) = env::var_os(env) {
btoi::<RawFd>(fd.as_bytes()).ok()
} else {
None
};
if let Some(fd) = fd {
if fd >= 0 {
let _ = close(fd);
}
}
}
let is_export = env::var_os(ENV_DUMP_SCMP).is_some();
match env::var_os("SYD_RUST_BACKTRACE") {
Some(val) => env::set_var("RUST_BACKTRACE", val),
None => env::remove_var("RUST_BACKTRACE"),
}
for (key, _) in env::vars_os() {
if is_prefix(key.as_bytes(), b"CARGO_BIN_EXE_syd")
|| (is_prefix(key.as_bytes(), b"SYD_") && !is_prefix(key.as_bytes(), b"SYD_TEST_"))
{
env::remove_var(key);
}
}
if let Some(callback) = &child.pre_exec {
if let Err(errno) = callback() {
fail_errno_safe!(child, Err::PreExec, errno as i32);
}
}
if child.cfg.deny_tsc {
if let Err(errno) =
Errno::result(unsafe { libc::prctl(libc::PR_SET_TSC, libc::PR_TSC_SIGSEGV) })
{
fail_errno_safe!(child, Err::SetTSC, errno as i32);
}
}
if child.cfg.restrict_prlimit {
if let Err(errno) = set_resource_limits(&child.cfg) {
fail_errno_safe!(child, Err::SetResourceLimits, errno as i32);
}
}
if let Err(errno) = set_dumpable(true) {
fail_errno_safe!(child, Err::SetDumpable, errno as i32);
}
if child.cfg.stop {
if let Err(errno) = kill(getpid(), Signal::SIGSTOP) {
fail_errno_safe!(child, Err::ProcessStop, errno as i32);
}
}
if let Some(seccomp_filter) = child.seccomp_filter {
let seccomp_fd = match seccomp_load_listener(&seccomp_filter) {
Ok(fd) => fd,
Err(errno) => fail_errno_safe!(child, Err::Seccomp, errno as i32),
};
let fd = seccomp_fd.as_raw_fd().to_le_bytes();
let mut nwrite = 0;
while nwrite < fd.len() {
#[expect(clippy::arithmetic_side_effects)]
match retry_on_eintr(|| write(&pipe_rw, &fd[nwrite..])) {
Ok(0) => {
fail_errno_safe!(child, Err::SeccompSendFd, Errno::EIO as i32);
}
Ok(n) => nwrite += n,
Err(errno) => fail_errno_safe!(child, Err::SeccompSendFd, errno as i32),
}
}
drop(pipe_rw);
let mut buf = [0u8; 1];
match retry_on_eintr(|| read(&pipe_ro, &mut buf[..])) {
Ok(0) => {
fail_errno_safe!(child, Err::SeccompWaitFd, Errno::EIO as i32);
}
Ok(1) if buf[0] == 42 => {
}
Ok(_) => unreachable!("BUG: The meaning of life is not {:#x}!", buf[0]),
Err(errno) => fail_errno_safe!(child, Err::SeccompWaitFd, errno as i32),
}
drop(seccomp_fd);
drop(seccomp_filter);
drop(pipe_ro);
} else {
drop(pipe_ro);
drop(pipe_rw);
}
const CAP_DROP: &[Capability] = &[
Capability::CAP_CHOWN,
Capability::CAP_MKNOD,
Capability::CAP_NET_BIND_SERVICE,
Capability::CAP_NET_RAW,
];
for cap in CAP_DROP {
if safe_drop_cap(*cap).is_err() {
fail_safe!(child, Err::CapSet);
}
}
if !child.cfg.keep && safe_drop_cap(Capability::CAP_SYS_PTRACE).is_err() {
fail_safe!(child, Err::CapSet);
}
if !child.cfg.restrict_prlimit {
if let Err(errno) = set_resource_limits(&child.cfg) {
fail_errno_safe!(child, Err::SetResourceLimits, errno as i32);
}
}
if is_export {
unsafe { libc::_exit(0) };
}
let Err(errno) = execvp(&child.exe_file, &child.exe_args);
fail_errno_safe!(child, Err::Exec, errno as i32);
}
fn set_resource_limits(cfg: &Config) -> Result<(), Errno> {
if let Some(lim) = cfg.rlimit_as {
setrlimit(Resource::RLIMIT_AS, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_core {
setrlimit(Resource::RLIMIT_CORE, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_cpu {
setrlimit(Resource::RLIMIT_CPU, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_data {
setrlimit(Resource::RLIMIT_DATA, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_fsize {
setrlimit(Resource::RLIMIT_FSIZE, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_memlock {
setrlimit(Resource::RLIMIT_MEMLOCK, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_msgqueue {
setrlimit(Resource::RLIMIT_MSGQUEUE, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_nice {
setrlimit(Resource::RLIMIT_NICE, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_nofile {
setrlimit(Resource::RLIMIT_NOFILE, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_nproc {
setrlimit(Resource::RLIMIT_NPROC, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_rtprio {
setrlimit(Resource::RLIMIT_RTPRIO, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_rttime {
setrlimit(Resource::RLIMIT_RTTIME, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_sigpending {
setrlimit(Resource::RLIMIT_SIGPENDING, lim, lim)?;
}
if let Some(lim) = cfg.rlimit_stack {
setrlimit(Resource::RLIMIT_STACK, lim, lim)?;
}
Ok(())
}