1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
//
// Syd: rock-solid application kernel
// src/kernel/exec.rs: exec(3) handlers
//
// Copyright (c) 2023, 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0
use libseccomp::ScmpNotifResp;
use nix::{errno::Errno, fcntl::AtFlags};
use crate::{
fd::{is_executable, AT_EXECVE_CHECK},
kernel::{sandbox_path, to_atflags},
lookup::{FileType, FsFlags},
req::{SysArg, SysFlags, UNotifyEventRequest},
sandbox::Capability,
};
pub(crate) fn sys_execve(request: UNotifyEventRequest) -> ScmpNotifResp {
let arg = SysArg {
path: Some(0),
fsflags: FsFlags::MUST_PATH,
..Default::default()
};
syscall_exec_handler(request, "execve", arg)
}
pub(crate) fn sys_execveat(request: UNotifyEventRequest) -> ScmpNotifResp {
let req = request.scmpreq;
// Reject undefined/invalid flags.
let flags = match to_atflags(
req.data.args[4],
AtFlags::AT_SYMLINK_NOFOLLOW | AtFlags::AT_EMPTY_PATH | AT_EXECVE_CHECK,
) {
Ok(flags) => flags,
Err(errno) => return request.fail_syscall(errno),
};
let mut fsflags = FsFlags::MUST_PATH;
if flags.contains(AtFlags::AT_SYMLINK_NOFOLLOW) {
fsflags.insert(FsFlags::NO_FOLLOW_LAST);
}
let empty_path = flags.contains(AtFlags::AT_EMPTY_PATH);
let arg = SysArg {
dirfd: Some(0),
path: Some(1),
flags: if empty_path {
SysFlags::EMPTY_PATH
} else {
SysFlags::empty()
},
fsflags,
};
syscall_exec_handler(request, "execveat", arg)
}
// This handler only runs with trace/allow_unsafe_ptrace:1, and it's
// vulnerable to TOCTOU. With ptrace on, this is mitigated using the
// TOCTOU-mitigator, see the wait() function for context.
// See: https://bugzilla.kernel.org/show_bug.cgi?id=218501
fn syscall_exec_handler(
request: UNotifyEventRequest,
syscall_name: &str,
arg: SysArg,
) -> ScmpNotifResp {
syscall_handler!(request, |request: UNotifyEventRequest| {
// We perform the exec checks even if exec sandboxing is off.
// In this case, the path check will be skipped,
// and only the file executability check (aka AT_EXECVE_CHECK)
// will be performed.
let sandbox = request.get_sandbox();
let restrict_memfd = !sandbox.flags.allow_unsafe_memfd();
// Read remote path.
let (path, _, _) = request.read_path(&sandbox, arg)?;
// Call sandbox access checker if Exec sandboxing is on.
if sandbox.enabled(Capability::CAP_EXEC) {
sandbox_path(
Some(&request),
&sandbox,
request.scmpreq.pid(), // Unused when request.is_some()
path.abs(),
Capability::CAP_EXEC,
syscall_name,
)?;
}
drop(sandbox); // release the read-lock.
if !arg.fsflags.follow_last()
&& path
.typ
.as_ref()
.map(|typ| typ.is_symlink() || typ.is_magic_link())
.unwrap_or(false)
{
// AT_SYMLINK_NOFOLLOW: If the file identified by dirfd and
// a non-NULL pathname is a symbolic link, then the call
// fails with the error ELOOP.
return Err(Errno::ELOOP);
}
// Return EACCES without any more processing if the file is not
// a regular file or a memory fd. Mfd check depends on
// trace/allow_unsafe_memfd option.
//
// Note, attempting to execute directories on Linux
// return EACCES, not EISDIR like the manual page
// claims. GNU make has a test checking this errno.
match path.typ.as_ref() {
Some(FileType::Reg) => {}
Some(FileType::Mfd) if !restrict_memfd => {}
//Some(FileType::Dir) => return Err(Errno::EISDIR),
_ => return Err(Errno::EACCES),
};
// Return EACCES without any more processing if file is not executable.
// This uses AT_EXECVE_CHECK on Linux>=6.14.
if !is_executable(path.dir()) {
return Err(Errno::EACCES);
}
// SAFETY: This is vulnerable to TOCTOU,
// See the comment at function header.
Ok(unsafe { request.continue_syscall() })
})
}