1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
//
// Syd: rock-solid application kernel
// src/kernel/exec.rs: exec(3) handlers
//
// Copyright (c) 2023, 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0
use libseccomp::ScmpNotifResp;
use nix::{errno::Errno, fcntl::AtFlags};
use crate::{
fd::{is_executable, AT_EXECVE_CHECK},
kernel::{sandbox_path, to_atflags},
lookup::{FileType, FsFlags},
req::{SysArg, SysFlags, UNotifyEventRequest},
sandbox::Capability,
};
pub(crate) fn sys_execve(request: UNotifyEventRequest) -> ScmpNotifResp {
let arg = SysArg {
path: Some(0),
fsflags: FsFlags::MUST_PATH,
..Default::default()
};
syscall_exec_handler(request, "execve", arg)
}
pub(crate) fn sys_execveat(request: UNotifyEventRequest) -> ScmpNotifResp {
let req = request.scmpreq;
// Reject undefined/invalid flags.
let flags = match to_atflags(
req.data.args[4],
AtFlags::AT_SYMLINK_NOFOLLOW | AtFlags::AT_EMPTY_PATH | AT_EXECVE_CHECK,
) {
Ok(flags) => flags,
Err(errno) => return request.fail_syscall(errno),
};
let mut fsflags = FsFlags::MUST_PATH;
if flags.contains(AtFlags::AT_SYMLINK_NOFOLLOW) {
fsflags.insert(FsFlags::NO_FOLLOW_LAST);
}
let empty_path = flags.contains(AtFlags::AT_EMPTY_PATH);
let arg = SysArg {
dirfd: Some(0),
path: Some(1),
flags: if empty_path {
SysFlags::EMPTY_PATH | SysFlags::PASS_DELETE
} else {
SysFlags::empty()
},
fsflags,
};
syscall_exec_handler(request, "execveat", arg)
}
// This handler only runs with trace/allow_unsafe_ptrace:1, and it's
// vulnerable to TOCTOU. With ptrace on, this is mitigated using the
// TOCTOU-mitigator, see the wait() function for context.
// See: https://bugzilla.kernel.org/show_bug.cgi?id=218501
fn syscall_exec_handler(
request: UNotifyEventRequest,
syscall_name: &str,
arg: SysArg,
) -> ScmpNotifResp {
syscall_handler!(request, |request: UNotifyEventRequest| {
// We perform the exec checks even if exec sandboxing is off.
// In this case, the path check will be skipped,
// and only the file executability check (aka AT_EXECVE_CHECK)
// will be performed.
let sandbox = request.get_sandbox();
let restrict_memfd = !sandbox.options.allow_unsafe_memfd();
// Read remote path.
let (path, _, _) = request.read_path(&sandbox, arg)?;
// Call sandbox access checker if Exec sandboxing is on.
if sandbox.enabled(Capability::CAP_EXEC) {
sandbox_path(
Some(&request),
&sandbox,
request.scmpreq.pid(), // Unused when request.is_some()
path.abs(),
Capability::CAP_EXEC,
syscall_name,
)?;
}
drop(sandbox); // release read lock.
if !arg.fsflags.follow_last()
&& path
.typ
.as_ref()
.map(|typ| typ.is_symlink() || typ.is_magic_link())
.unwrap_or(false)
{
// AT_SYMLINK_NOFOLLOW: If the file identified by dirfd and
// a non-NULL pathname is a symbolic link, then the call
// fails with the error ELOOP.
return Err(Errno::ELOOP);
}
// Return EACCES without any more processing if the file is not
// a regular file or a memory fd. Mfd check depends on
// trace/allow_unsafe_memfd option.
//
// Linux rejects attempts to execute directories with EACCES.
match path.typ.as_ref() {
Some(FileType::Reg) => {}
Some(FileType::Mfd) if !restrict_memfd => {}
_ => return Err(Errno::EACCES),
};
// Return EACCES without any more processing if file is not executable.
// This uses AT_EXECVE_CHECK on Linux>=6.14.
if !is_executable(path.dir()) {
return Err(Errno::EACCES);
}
// SAFETY: This is vulnerable to TOCTOU,
// See the comment at function header.
Ok(unsafe { request.continue_syscall() })
})
}