Skip to main content

evalbox_sandbox/notify/
supervisor.rs

1//! Seccomp notification supervisor.
2//!
3//! Runs in the parent process, handling intercepted syscalls from the sandboxed child.
4//! The supervisor receives notifications via the seccomp listener fd and decides
5//! how to respond based on the configured [`NotifyMode`].
6//!
7//! ## Modes
8//!
9//! - **Monitor**: Log syscall and return `SECCOMP_USER_NOTIF_FLAG_CONTINUE`
10//! - **Virtualize**: Translate filesystem paths via [`VirtualFs`], inject fds via `ADDFD`
11
12use std::fs::File;
13use std::io::{self, Read, Seek, SeekFrom};
14use std::os::fd::{AsRawFd, OwnedFd, RawFd};
15
16use evalbox_sys::seccomp_notify::{
17    SECCOMP_ADDFD_FLAG_SEND, SECCOMP_USER_NOTIF_FLAG_CONTINUE, SeccompNotif, SeccompNotifAddfd,
18    SeccompNotifResp, notif_addfd, notif_id_valid, notif_recv, notif_send,
19};
20
21use super::virtual_fs::VirtualFs;
22use crate::plan::NotifyMode;
23
24/// Events emitted by the supervisor for future user-facing notifications.
25#[derive(Debug)]
26pub enum NotifyEvent {
27    /// A syscall was intercepted and handled.
28    SyscallHandled {
29        /// PID of the process that made the syscall.
30        pid: u32,
31        /// Syscall number.
32        syscall_nr: i32,
33        /// Whether the syscall was allowed.
34        allowed: bool,
35    },
36}
37
38/// Seccomp notification supervisor.
39pub struct Supervisor {
40    listener_fd: OwnedFd,
41    mode: NotifyMode,
42    vfs: VirtualFs,
43}
44
45impl Supervisor {
46    /// Create a new supervisor.
47    pub fn new(listener_fd: OwnedFd, mode: NotifyMode, vfs: VirtualFs) -> Self {
48        Self {
49            listener_fd,
50            mode,
51            vfs,
52        }
53    }
54
55    /// Get the raw fd for registering with poll/mio.
56    pub fn fd(&self) -> RawFd {
57        self.listener_fd.as_raw_fd()
58    }
59
60    /// Handle a notification event. Call when the listener fd is readable.
61    ///
62    /// Returns `Some(NotifyEvent)` on success, `None` if the notification was
63    /// stale (child died or already handled).
64    pub fn handle_event(&self) -> io::Result<Option<NotifyEvent>> {
65        let mut notif = SeccompNotif::default();
66
67        if let Err(e) = notif_recv(self.listener_fd.as_raw_fd(), &mut notif) {
68            // ENOENT means the target process died before we could receive
69            if e == rustix::io::Errno::NOENT {
70                return Ok(None);
71            }
72            return Err(io::Error::from_raw_os_error(e.raw_os_error()));
73        }
74
75        match self.mode {
76            NotifyMode::Disabled => {
77                debug_assert!(
78                    false,
79                    "supervisor received notification with NotifyMode::Disabled"
80                );
81                self.respond_continue(&notif)?;
82                Ok(None)
83            }
84            NotifyMode::Monitor => self.handle_monitor(&notif),
85            NotifyMode::Virtualize => self.handle_virtualize(&notif),
86        }
87    }
88
89    fn handle_monitor(&self, notif: &SeccompNotif) -> io::Result<Option<NotifyEvent>> {
90        let syscall_name = syscall_name(notif.data.nr);
91        eprintln!(
92            "[notify] pid={} syscall={}({}) args=[{:#x}, {:#x}, {:#x}]",
93            notif.pid,
94            syscall_name,
95            notif.data.nr,
96            notif.data.args[0],
97            notif.data.args[1],
98            notif.data.args[2],
99        );
100
101        self.respond_continue(notif)?;
102
103        Ok(Some(NotifyEvent::SyscallHandled {
104            pid: notif.pid,
105            syscall_nr: notif.data.nr,
106            allowed: true,
107        }))
108    }
109
110    fn handle_virtualize(&self, notif: &SeccompNotif) -> io::Result<Option<NotifyEvent>> {
111        let syscall_nr = notif.data.nr;
112
113        // For openat-family syscalls, args[1] is the pathname pointer
114        // For open/creat, args[0] is the pathname pointer
115        let path_addr = if syscall_nr == libc::SYS_openat as i32
116            || syscall_nr == libc::SYS_newfstatat as i32
117            || syscall_nr == libc::SYS_faccessat as i32
118            || syscall_nr == libc::SYS_faccessat2 as i32
119            || syscall_nr == libc::SYS_readlinkat as i32
120        {
121            notif.data.args[1]
122        } else {
123            notif.data.args[0]
124        };
125
126        // Read path from child's memory
127        let path = match self.read_child_string(notif.pid, path_addr) {
128            Ok(p) => p,
129            Err(_) => {
130                // Can't read memory, let syscall proceed
131                self.respond_continue(notif)?;
132                return Ok(None);
133            }
134        };
135
136        // TOCTOU check: verify notification is still valid after reading memory
137        if notif_id_valid(self.listener_fd.as_raw_fd(), notif.id).is_err() {
138            return Ok(None); // Notification is stale
139        }
140
141        // Try to translate path
142        if let Some(real_path) = self.vfs.translate(&path) {
143            // For openat: open the file ourselves and inject the fd
144            if syscall_nr == libc::SYS_openat as i32
145                || syscall_nr == libc::SYS_open as i32
146                || syscall_nr == libc::SYS_creat as i32
147            {
148                let flags = if syscall_nr == libc::SYS_openat as i32 {
149                    notif.data.args[2] as i32
150                } else {
151                    notif.data.args[1] as i32
152                };
153
154                match self.open_and_inject(notif, &real_path, flags) {
155                    Ok(()) => {
156                        return Ok(Some(NotifyEvent::SyscallHandled {
157                            pid: notif.pid,
158                            syscall_nr,
159                            allowed: true,
160                        }));
161                    }
162                    Err(_) => {
163                        // Fall through to continue
164                    }
165                }
166            }
167        }
168
169        // No translation or non-open syscall: let it proceed as-is
170        self.respond_continue(notif)?;
171        Ok(Some(NotifyEvent::SyscallHandled {
172            pid: notif.pid,
173            syscall_nr,
174            allowed: true,
175        }))
176    }
177
178    fn respond_continue(&self, notif: &SeccompNotif) -> io::Result<()> {
179        let resp = SeccompNotifResp {
180            id: notif.id,
181            val: 0,
182            error: 0,
183            flags: SECCOMP_USER_NOTIF_FLAG_CONTINUE,
184        };
185        notif_send(self.listener_fd.as_raw_fd(), &resp)
186            .map_err(|e| io::Error::from_raw_os_error(e.raw_os_error()))
187    }
188
189    fn open_and_inject(
190        &self,
191        notif: &SeccompNotif,
192        real_path: &std::path::Path,
193        flags: i32,
194    ) -> io::Result<()> {
195        use std::ffi::CString;
196        use std::os::unix::ffi::OsStrExt;
197
198        let path_c = CString::new(real_path.as_os_str().as_bytes())
199            .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "invalid path"))?;
200
201        // Open the file at the translated path
202        let fd = unsafe { libc::open(path_c.as_ptr(), flags & !libc::O_CLOEXEC, 0o666) };
203        if fd < 0 {
204            return Err(io::Error::last_os_error());
205        }
206
207        // Inject the fd into the child and atomically respond
208        let addfd = SeccompNotifAddfd {
209            id: notif.id,
210            flags: SECCOMP_ADDFD_FLAG_SEND,
211            srcfd: fd as u32,
212            newfd: 0,
213            newfd_flags: 0,
214        };
215
216        let result = notif_addfd(self.listener_fd.as_raw_fd(), &addfd)
217            .map_err(|e| io::Error::from_raw_os_error(e.raw_os_error()));
218
219        // Close our copy of the fd
220        unsafe { libc::close(fd) };
221
222        result.map(|_| ())
223    }
224
225    /// Read a null-terminated string from the child's memory via `/proc/pid/mem`.
226    fn read_child_string(&self, pid: u32, addr: u64) -> io::Result<String> {
227        let mem_path = format!("/proc/{pid}/mem");
228        let mut file = File::open(&mem_path)?;
229        file.seek(SeekFrom::Start(addr))?;
230
231        let mut buf = vec![0u8; 4096];
232        let n = file.read(&mut buf)?;
233        buf.truncate(n);
234
235        // Find null terminator
236        if let Some(nul_pos) = buf.iter().position(|&b| b == 0) {
237            buf.truncate(nul_pos);
238        }
239
240        String::from_utf8(buf)
241            .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "invalid UTF-8 in path"))
242    }
243}
244
245/// Map syscall number to name for logging.
246fn syscall_name(nr: i32) -> &'static str {
247    match nr as i64 {
248        libc::SYS_openat => "openat",
249        libc::SYS_open => "open",
250        libc::SYS_creat => "creat",
251        libc::SYS_access => "access",
252        libc::SYS_faccessat => "faccessat",
253        libc::SYS_faccessat2 => "faccessat2",
254        libc::SYS_stat => "stat",
255        libc::SYS_lstat => "lstat",
256        libc::SYS_newfstatat => "newfstatat",
257        libc::SYS_statx => "statx",
258        libc::SYS_readlink => "readlink",
259        libc::SYS_readlinkat => "readlinkat",
260        _ => "unknown",
261    }
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267
268    #[test]
269    fn syscall_names() {
270        assert_eq!(syscall_name(libc::SYS_openat as i32), "openat");
271        assert_eq!(syscall_name(libc::SYS_stat as i32), "stat");
272        assert_eq!(syscall_name(9999), "unknown");
273    }
274}