evalbox_sys/seccomp_notify.rs
1//! Seccomp user notification (`SECCOMP_RET_USER_NOTIF`) support.
2//!
3//! Seccomp user notification allows a supervisor process to intercept
4//! syscalls from a sandboxed child and make decisions on its behalf.
5//! This enables filesystem virtualization without user namespaces.
6//!
7//! ## Architecture
8//!
9//! 1. Child installs a seccomp filter with `SECCOMP_FILTER_FLAG_NEW_LISTENER`
10//! 2. This returns a "listener fd" which is passed to the parent via `SCM_RIGHTS`
11//! 3. Parent polls the listener fd; when readable, calls `SECCOMP_IOCTL_NOTIF_RECV`
12//! 4. Parent inspects the syscall and either:
13//! - Returns `SECCOMP_USER_NOTIF_FLAG_CONTINUE` to let it proceed
14//! - Returns an error code to deny it
15//! - Uses `SECCOMP_IOCTL_NOTIF_ADDFD` to inject a file descriptor
16//!
17//! ## TOCTOU Protection
18//!
19//! Between receiving a notification and responding, the child's memory may change.
20//! Always call `SECCOMP_IOCTL_NOTIF_ID_VALID` after reading child memory to verify
21//! the notification is still valid.
22
23use std::os::fd::{FromRawFd, OwnedFd};
24
25use rustix::io::Errno;
26
27use crate::last_errno;
28use crate::seccomp::SockFprog;
29
30// Seccomp constants for notify
31const SECCOMP_SET_MODE_FILTER: u32 = 1;
32pub const SECCOMP_FILTER_FLAG_NEW_LISTENER: u32 = 1 << 3;
33
34/// Let the syscall proceed as-is (supervisor approves).
35pub const SECCOMP_USER_NOTIF_FLAG_CONTINUE: u32 = 1;
36
37/// Atomically inject fd and respond to the notification.
38pub const SECCOMP_ADDFD_FLAG_SEND: u32 = 1 << 0;
39/// Replace an existing fd in the target process.
40pub const SECCOMP_ADDFD_FLAG_SETFD: u32 = 1 << 1;
41
42// ioctl numbers for seccomp notify (from kernel headers)
43// These are architecture-dependent; values below are for x86_64.
44// SECCOMP_IOCTL_NOTIF_RECV = SECCOMP_IOWR(0, struct seccomp_notif)
45// SECCOMP_IOCTL_NOTIF_SEND = SECCOMP_IOWR(1, struct seccomp_notif_resp)
46// SECCOMP_IOCTL_NOTIF_ID_VALID = SECCOMP_IOW(2, __u64)
47// SECCOMP_IOCTL_NOTIF_ADDFD = SECCOMP_IOW(3, struct seccomp_notif_addfd)
48
49/// ioctl to receive a notification from the seccomp listener fd.
50pub const SECCOMP_IOCTL_NOTIF_RECV: u64 = 0xc0502100;
51/// ioctl to send a response to a seccomp notification.
52pub const SECCOMP_IOCTL_NOTIF_SEND: u64 = 0xc0182101;
53/// ioctl to check if a notification ID is still valid (TOCTOU protection).
54pub const SECCOMP_IOCTL_NOTIF_ID_VALID: u64 = 0x40082102;
55/// ioctl to inject a file descriptor into the notifying process.
56pub const SECCOMP_IOCTL_NOTIF_ADDFD: u64 = 0x40182103;
57
58/// Seccomp notification data (mirrors kernel `struct seccomp_data`).
59#[repr(C)]
60#[derive(Debug, Clone, Copy, Default)]
61pub struct SeccompData {
62 /// Syscall number.
63 pub nr: i32,
64 /// Architecture (`AUDIT_ARCH_*`).
65 pub arch: u32,
66 /// Instruction pointer at time of syscall.
67 pub instruction_pointer: u64,
68 /// Syscall arguments.
69 pub args: [u64; 6],
70}
71
72/// Seccomp notification received from the child (mirrors kernel `struct seccomp_notif`).
73#[repr(C)]
74#[derive(Debug, Clone, Copy)]
75pub struct SeccompNotif {
76 /// Unique notification ID.
77 pub id: u64,
78 /// PID of the notifying process (in supervisor's PID namespace).
79 pub pid: u32,
80 /// Flags (currently unused, must be 0).
81 pub flags: u32,
82 /// The syscall data.
83 pub data: SeccompData,
84}
85
86impl Default for SeccompNotif {
87 fn default() -> Self {
88 // SAFETY: SeccompNotif is a plain C struct with no invariants.
89 unsafe { std::mem::zeroed() }
90 }
91}
92
93/// Response to a seccomp notification (mirrors kernel `struct seccomp_notif_resp`).
94#[repr(C)]
95#[derive(Debug, Clone, Copy, Default)]
96pub struct SeccompNotifResp {
97 /// Must match the notification ID.
98 pub id: u64,
99 /// Return value for the syscall.
100 pub val: i64,
101 /// Errno value (negated in kernel).
102 pub error: i32,
103 /// Flags (e.g., `SECCOMP_USER_NOTIF_FLAG_CONTINUE`).
104 pub flags: u32,
105}
106
107/// Inject a file descriptor into the notifying process
108/// (mirrors kernel `struct seccomp_notif_addfd`).
109#[repr(C)]
110#[derive(Debug, Clone, Copy, Default)]
111pub struct SeccompNotifAddfd {
112 /// Must match the notification ID.
113 pub id: u64,
114 /// Flags (e.g., `SECCOMP_ADDFD_FLAG_SEND`).
115 pub flags: u32,
116 /// The fd in the supervisor to inject.
117 pub srcfd: u32,
118 /// The fd number to use in the target (0 = kernel picks).
119 pub newfd: u32,
120 /// Flags for the new fd (e.g., `O_CLOEXEC`).
121 pub newfd_flags: u32,
122}
123
124/// Install a seccomp filter with `SECCOMP_FILTER_FLAG_NEW_LISTENER`.
125///
126/// Returns the listener fd which can be used to receive notifications.
127/// The caller must have already called `PR_SET_NO_NEW_PRIVS`.
128///
129/// # Safety
130///
131/// The filter must be a valid BPF program. This permanently restricts
132/// syscalls for this thread.
133///
134/// # Errors
135///
136/// Returns `Errno` if the filter cannot be installed.
137pub unsafe fn seccomp_set_mode_filter_listener(fprog: &SockFprog) -> Result<OwnedFd, Errno> {
138 unsafe {
139 let ret = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
140 if ret != 0 {
141 return Err(last_errno());
142 }
143
144 let ret = libc::syscall(
145 libc::SYS_seccomp,
146 SECCOMP_SET_MODE_FILTER,
147 SECCOMP_FILTER_FLAG_NEW_LISTENER,
148 fprog as *const _,
149 );
150 if ret < 0 {
151 Err(last_errno())
152 } else {
153 // SAFETY: On success, ret is a valid listener file descriptor.
154 Ok(OwnedFd::from_raw_fd(ret as i32))
155 }
156 }
157}
158
159/// Receive a notification from the seccomp listener fd.
160///
161/// Blocks until a notification is available (or use poll/epoll first).
162///
163/// # Errors
164///
165/// Returns `Errno` on failure (e.g., `ENOENT` if the target died).
166pub fn notif_recv(listener_fd: i32, notif: &mut SeccompNotif) -> Result<(), Errno> {
167 let ret = unsafe {
168 libc::ioctl(
169 listener_fd,
170 SECCOMP_IOCTL_NOTIF_RECV,
171 notif as *mut SeccompNotif,
172 )
173 };
174 if ret < 0 { Err(last_errno()) } else { Ok(()) }
175}
176
177/// Send a response to a seccomp notification.
178///
179/// # Errors
180///
181/// Returns `Errno` on failure.
182pub fn notif_send(listener_fd: i32, resp: &SeccompNotifResp) -> Result<(), Errno> {
183 let ret = unsafe {
184 libc::ioctl(
185 listener_fd,
186 SECCOMP_IOCTL_NOTIF_SEND,
187 resp as *const SeccompNotifResp,
188 )
189 };
190 if ret < 0 { Err(last_errno()) } else { Ok(()) }
191}
192
193/// Check if a notification ID is still valid.
194///
195/// Must be called after reading from child's `/proc/pid/mem` to protect
196/// against TOCTOU attacks.
197///
198/// # Errors
199///
200/// Returns `Errno::NOENT` if the notification is no longer valid.
201pub fn notif_id_valid(listener_fd: i32, id: u64) -> Result<(), Errno> {
202 let ret = unsafe { libc::ioctl(listener_fd, SECCOMP_IOCTL_NOTIF_ID_VALID, &id as *const u64) };
203 if ret < 0 { Err(last_errno()) } else { Ok(()) }
204}
205
206/// Inject a file descriptor into the notifying process.
207///
208/// With `SECCOMP_ADDFD_FLAG_SEND`, this atomically injects the fd and
209/// responds to the notification (the return value becomes the new fd number
210/// in the target process).
211///
212/// # Errors
213///
214/// Returns `Errno` on failure.
215pub fn notif_addfd(listener_fd: i32, addfd: &SeccompNotifAddfd) -> Result<i32, Errno> {
216 let ret = unsafe {
217 libc::ioctl(
218 listener_fd,
219 SECCOMP_IOCTL_NOTIF_ADDFD,
220 addfd as *const SeccompNotifAddfd,
221 )
222 };
223 if ret < 0 { Err(last_errno()) } else { Ok(ret) }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229
230 #[test]
231 fn struct_sizes() {
232 // Verify struct sizes match kernel expectations
233 assert_eq!(size_of::<SeccompData>(), 64);
234 assert_eq!(size_of::<SeccompNotif>(), 80);
235 assert_eq!(size_of::<SeccompNotifResp>(), 24);
236 assert_eq!(size_of::<SeccompNotifAddfd>(), 24);
237 }
238
239 #[test]
240 fn default_notif_is_zeroed() {
241 let notif = SeccompNotif::default();
242 assert_eq!(notif.id, 0);
243 assert_eq!(notif.pid, 0);
244 assert_eq!(notif.data.nr, 0);
245 }
246}