Skip to main content

evalbox_sys/
seccomp_notify.rs

1//! Seccomp user notification (`SECCOMP_RET_USER_NOTIF`) support.
2//!
3//! Seccomp user notification allows a supervisor process to intercept
4//! syscalls from a sandboxed child and make decisions on its behalf.
5//! This enables filesystem virtualization without user namespaces.
6//!
7//! ## Architecture
8//!
9//! 1. Child installs a seccomp filter with `SECCOMP_FILTER_FLAG_NEW_LISTENER`
10//! 2. This returns a "listener fd" which is passed to the parent via `SCM_RIGHTS`
11//! 3. Parent polls the listener fd; when readable, calls `SECCOMP_IOCTL_NOTIF_RECV`
12//! 4. Parent inspects the syscall and either:
13//!    - Returns `SECCOMP_USER_NOTIF_FLAG_CONTINUE` to let it proceed
14//!    - Returns an error code to deny it
15//!    - Uses `SECCOMP_IOCTL_NOTIF_ADDFD` to inject a file descriptor
16//!
17//! ## TOCTOU Protection
18//!
19//! Between receiving a notification and responding, the child's memory may change.
20//! Always call `SECCOMP_IOCTL_NOTIF_ID_VALID` after reading child memory to verify
21//! the notification is still valid.
22
23use std::os::fd::{FromRawFd, OwnedFd};
24
25use rustix::io::Errno;
26
27use crate::last_errno;
28use crate::seccomp::SockFprog;
29
30// Seccomp constants for notify
31const SECCOMP_SET_MODE_FILTER: u32 = 1;
32pub const SECCOMP_FILTER_FLAG_NEW_LISTENER: u32 = 1 << 3;
33
34/// Let the syscall proceed as-is (supervisor approves).
35pub const SECCOMP_USER_NOTIF_FLAG_CONTINUE: u32 = 1;
36
37/// Atomically inject fd and respond to the notification.
38pub const SECCOMP_ADDFD_FLAG_SEND: u32 = 1 << 0;
39/// Replace an existing fd in the target process.
40pub const SECCOMP_ADDFD_FLAG_SETFD: u32 = 1 << 1;
41
42// ioctl numbers for seccomp notify (from kernel headers)
43// These are architecture-dependent; values below are for x86_64.
44// SECCOMP_IOCTL_NOTIF_RECV = SECCOMP_IOWR(0, struct seccomp_notif)
45// SECCOMP_IOCTL_NOTIF_SEND = SECCOMP_IOWR(1, struct seccomp_notif_resp)
46// SECCOMP_IOCTL_NOTIF_ID_VALID = SECCOMP_IOW(2, __u64)
47// SECCOMP_IOCTL_NOTIF_ADDFD = SECCOMP_IOW(3, struct seccomp_notif_addfd)
48
49/// ioctl to receive a notification from the seccomp listener fd.
50pub const SECCOMP_IOCTL_NOTIF_RECV: u64 = 0xc0502100;
51/// ioctl to send a response to a seccomp notification.
52pub const SECCOMP_IOCTL_NOTIF_SEND: u64 = 0xc0182101;
53/// ioctl to check if a notification ID is still valid (TOCTOU protection).
54pub const SECCOMP_IOCTL_NOTIF_ID_VALID: u64 = 0x40082102;
55/// ioctl to inject a file descriptor into the notifying process.
56pub const SECCOMP_IOCTL_NOTIF_ADDFD: u64 = 0x40182103;
57
58/// Seccomp notification data (mirrors kernel `struct seccomp_data`).
59#[repr(C)]
60#[derive(Debug, Clone, Copy, Default)]
61pub struct SeccompData {
62    /// Syscall number.
63    pub nr: i32,
64    /// Architecture (`AUDIT_ARCH_*`).
65    pub arch: u32,
66    /// Instruction pointer at time of syscall.
67    pub instruction_pointer: u64,
68    /// Syscall arguments.
69    pub args: [u64; 6],
70}
71
72/// Seccomp notification received from the child (mirrors kernel `struct seccomp_notif`).
73#[repr(C)]
74#[derive(Debug, Clone, Copy)]
75pub struct SeccompNotif {
76    /// Unique notification ID.
77    pub id: u64,
78    /// PID of the notifying process (in supervisor's PID namespace).
79    pub pid: u32,
80    /// Flags (currently unused, must be 0).
81    pub flags: u32,
82    /// The syscall data.
83    pub data: SeccompData,
84}
85
86impl Default for SeccompNotif {
87    fn default() -> Self {
88        // SAFETY: SeccompNotif is a plain C struct with no invariants.
89        unsafe { std::mem::zeroed() }
90    }
91}
92
93/// Response to a seccomp notification (mirrors kernel `struct seccomp_notif_resp`).
94#[repr(C)]
95#[derive(Debug, Clone, Copy, Default)]
96pub struct SeccompNotifResp {
97    /// Must match the notification ID.
98    pub id: u64,
99    /// Return value for the syscall.
100    pub val: i64,
101    /// Errno value (negated in kernel).
102    pub error: i32,
103    /// Flags (e.g., `SECCOMP_USER_NOTIF_FLAG_CONTINUE`).
104    pub flags: u32,
105}
106
107/// Inject a file descriptor into the notifying process
108/// (mirrors kernel `struct seccomp_notif_addfd`).
109#[repr(C)]
110#[derive(Debug, Clone, Copy, Default)]
111pub struct SeccompNotifAddfd {
112    /// Must match the notification ID.
113    pub id: u64,
114    /// Flags (e.g., `SECCOMP_ADDFD_FLAG_SEND`).
115    pub flags: u32,
116    /// The fd in the supervisor to inject.
117    pub srcfd: u32,
118    /// The fd number to use in the target (0 = kernel picks).
119    pub newfd: u32,
120    /// Flags for the new fd (e.g., `O_CLOEXEC`).
121    pub newfd_flags: u32,
122}
123
124/// Install a seccomp filter with `SECCOMP_FILTER_FLAG_NEW_LISTENER`.
125///
126/// Returns the listener fd which can be used to receive notifications.
127/// The caller must have already called `PR_SET_NO_NEW_PRIVS`.
128///
129/// # Safety
130///
131/// The filter must be a valid BPF program. This permanently restricts
132/// syscalls for this thread.
133///
134/// # Errors
135///
136/// Returns `Errno` if the filter cannot be installed.
137pub unsafe fn seccomp_set_mode_filter_listener(fprog: &SockFprog) -> Result<OwnedFd, Errno> {
138    unsafe {
139        let ret = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
140        if ret != 0 {
141            return Err(last_errno());
142        }
143
144        let ret = libc::syscall(
145            libc::SYS_seccomp,
146            SECCOMP_SET_MODE_FILTER,
147            SECCOMP_FILTER_FLAG_NEW_LISTENER,
148            fprog as *const _,
149        );
150        if ret < 0 {
151            Err(last_errno())
152        } else {
153            // SAFETY: On success, ret is a valid listener file descriptor.
154            Ok(OwnedFd::from_raw_fd(ret as i32))
155        }
156    }
157}
158
159/// Receive a notification from the seccomp listener fd.
160///
161/// Blocks until a notification is available (or use poll/epoll first).
162///
163/// # Errors
164///
165/// Returns `Errno` on failure (e.g., `ENOENT` if the target died).
166pub fn notif_recv(listener_fd: i32, notif: &mut SeccompNotif) -> Result<(), Errno> {
167    let ret = unsafe {
168        libc::ioctl(
169            listener_fd,
170            SECCOMP_IOCTL_NOTIF_RECV,
171            notif as *mut SeccompNotif,
172        )
173    };
174    if ret < 0 { Err(last_errno()) } else { Ok(()) }
175}
176
177/// Send a response to a seccomp notification.
178///
179/// # Errors
180///
181/// Returns `Errno` on failure.
182pub fn notif_send(listener_fd: i32, resp: &SeccompNotifResp) -> Result<(), Errno> {
183    let ret = unsafe {
184        libc::ioctl(
185            listener_fd,
186            SECCOMP_IOCTL_NOTIF_SEND,
187            resp as *const SeccompNotifResp,
188        )
189    };
190    if ret < 0 { Err(last_errno()) } else { Ok(()) }
191}
192
193/// Check if a notification ID is still valid.
194///
195/// Must be called after reading from child's `/proc/pid/mem` to protect
196/// against TOCTOU attacks.
197///
198/// # Errors
199///
200/// Returns `Errno::NOENT` if the notification is no longer valid.
201pub fn notif_id_valid(listener_fd: i32, id: u64) -> Result<(), Errno> {
202    let ret = unsafe { libc::ioctl(listener_fd, SECCOMP_IOCTL_NOTIF_ID_VALID, &id as *const u64) };
203    if ret < 0 { Err(last_errno()) } else { Ok(()) }
204}
205
206/// Inject a file descriptor into the notifying process.
207///
208/// With `SECCOMP_ADDFD_FLAG_SEND`, this atomically injects the fd and
209/// responds to the notification (the return value becomes the new fd number
210/// in the target process).
211///
212/// # Errors
213///
214/// Returns `Errno` on failure.
215pub fn notif_addfd(listener_fd: i32, addfd: &SeccompNotifAddfd) -> Result<i32, Errno> {
216    let ret = unsafe {
217        libc::ioctl(
218            listener_fd,
219            SECCOMP_IOCTL_NOTIF_ADDFD,
220            addfd as *const SeccompNotifAddfd,
221        )
222    };
223    if ret < 0 { Err(last_errno()) } else { Ok(ret) }
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229
230    #[test]
231    fn struct_sizes() {
232        // Verify struct sizes match kernel expectations
233        assert_eq!(size_of::<SeccompData>(), 64);
234        assert_eq!(size_of::<SeccompNotif>(), 80);
235        assert_eq!(size_of::<SeccompNotifResp>(), 24);
236        assert_eq!(size_of::<SeccompNotifAddfd>(), 24);
237    }
238
239    #[test]
240    fn default_notif_is_zeroed() {
241        let notif = SeccompNotif::default();
242        assert_eq!(notif.id, 0);
243        assert_eq!(notif.pid, 0);
244        assert_eq!(notif.data.nr, 0);
245    }
246}