1use std::collections::HashSet;
6use std::io;
7use std::net::IpAddr;
8use std::os::unix::io::{AsRawFd, FromRawFd, OwnedFd, RawFd};
9use std::sync::Arc;
10
11use crate::error::NotifError;
12use crate::arch;
13use crate::sys::structs::{
14 SeccompNotif, SeccompNotifAddfd, SeccompNotifResp,
15 SECCOMP_ADDFD_FLAG_SEND, SECCOMP_IOCTL_NOTIF_ADDFD, SECCOMP_IOCTL_NOTIF_ID_VALID, SECCOMP_IOCTL_NOTIF_RECV,
16 SECCOMP_IOCTL_NOTIF_SEND, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
17 SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, SECCOMP_USER_NOTIF_FLAG_CONTINUE,
18 ENOMEM,
19};
20
21pub struct OnInjectSuccess(pub Box<dyn FnOnce(i32) + Send + Sync>);
32
33impl std::fmt::Debug for OnInjectSuccess {
34 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35 f.write_str("OnInjectSuccess(<callback>)")
36 }
37}
38
39impl OnInjectSuccess {
40 pub fn new<F: FnOnce(i32) + Send + Sync + 'static>(f: F) -> Self {
41 Self(Box::new(f))
42 }
43}
44
45#[derive(Debug)]
47pub enum NotifAction {
48 Continue,
50 Errno(i32),
52 InjectFd { srcfd: RawFd, targetfd: i32 },
54 InjectFdSend { srcfd: OwnedFd, newfd_flags: u32 },
59 InjectFdSendTracked {
64 srcfd: OwnedFd,
65 newfd_flags: u32,
66 on_success: OnInjectSuccess,
67 },
68 ReturnValue(i64),
70 Hold,
72 Kill { sig: i32, pgid: i32 },
75}
76
77#[derive(Debug, Clone)]
83pub enum NetworkPolicy {
84 Unrestricted,
86 AllowList(HashSet<IpAddr>),
88}
89
90pub(crate) fn is_path_denied_for_notif(
101 policy_fn_state: &super::state::PolicyFnState,
102 notif: &SeccompNotif,
103 notif_fd: RawFd,
104) -> bool {
105 if let Some(path) = resolve_path_for_notif(notif, notif_fd) {
106 if is_denied_with_symlink_resolve(policy_fn_state, &path) {
107 return true;
108 }
109 }
110 if let Some(path) = resolve_second_path_for_notif(notif, notif_fd) {
112 if is_denied_with_symlink_resolve(policy_fn_state, &path) {
113 return true;
114 }
115 }
116 false
117}
118
119fn is_denied_with_symlink_resolve(
125 policy_fn_state: &super::state::PolicyFnState,
126 path: &str,
127) -> bool {
128 if policy_fn_state.is_path_denied(path) {
130 return true;
131 }
132 if let Ok(real) = std::fs::canonicalize(path) {
134 if policy_fn_state.is_path_denied(&real.to_string_lossy()) {
135 return true;
136 }
137 }
138 false
139}
140
141pub(crate) fn dup_fd_from_pid(pid: u32, target_fd: i32) -> Result<OwnedFd, io::Error> {
144 const SYS_PIDFD_OPEN: i64 = 434;
145 const SYS_PIDFD_GETFD: i64 = 438;
146 const PIDFD_THREAD: i64 = libc::O_EXCL as i64; let pidfd = unsafe { libc::syscall(SYS_PIDFD_OPEN, pid as i64, PIDFD_THREAD) };
148 if pidfd < 0 {
149 return Err(io::Error::last_os_error());
150 }
151 let pidfd_owned = unsafe { OwnedFd::from_raw_fd(pidfd as i32) };
152 let ret = unsafe {
153 libc::syscall(SYS_PIDFD_GETFD, pidfd_owned.as_raw_fd() as i64, target_fd as i64, 0i64)
154 };
155 if ret < 0 {
156 Err(io::Error::last_os_error())
157 } else {
158 Ok(unsafe { OwnedFd::from_raw_fd(ret as i32) })
159 }
160}
161
162pub struct NotifPolicy {
168 pub max_memory_bytes: u64,
169 pub max_processes: u32,
170 pub has_memory_limit: bool,
171 pub has_net_allowlist: bool,
172 pub has_random_seed: bool,
173 pub has_time_start: bool,
174 pub time_offset: i64,
175 pub num_cpus: Option<u32>,
176 pub port_remap: bool,
177 pub cow_enabled: bool,
178 pub chroot_root: Option<std::path::PathBuf>,
179 pub chroot_readable: Vec<std::path::PathBuf>,
181 pub chroot_writable: Vec<std::path::PathBuf>,
183 pub chroot_denied: Vec<std::path::PathBuf>,
185 pub chroot_mounts: Vec<(std::path::PathBuf, std::path::PathBuf)>,
187 pub deterministic_dirs: bool,
188 pub hostname: Option<String>,
189 pub has_http_acl: bool,
190 pub virtual_etc_hosts: Option<String>,
194}
195
196fn recv_notif(fd: RawFd) -> io::Result<SeccompNotif> {
203 let mut notif: SeccompNotif = unsafe { std::mem::zeroed() };
204 let ret = unsafe {
205 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_RECV as libc::c_ulong, &mut notif as *mut _)
206 };
207 if ret < 0 {
208 Err(io::Error::last_os_error())
209 } else {
210 Ok(notif)
211 }
212}
213
214fn respond_continue(fd: RawFd, id: u64) -> io::Result<()> {
216 let resp = SeccompNotifResp {
217 id,
218 val: 0,
219 error: 0,
220 flags: SECCOMP_USER_NOTIF_FLAG_CONTINUE,
221 };
222 send_resp_raw(fd, &resp)
223}
224
225fn respond_errno(fd: RawFd, id: u64, errno: i32) -> io::Result<()> {
227 let resp = SeccompNotifResp {
228 id,
229 val: 0,
230 error: -errno,
231 flags: 0,
232 };
233 send_resp_raw(fd, &resp)
234}
235
236fn respond_value(fd: RawFd, id: u64, val: i64) -> io::Result<()> {
238 let resp = SeccompNotifResp {
239 id,
240 val,
241 error: 0,
242 flags: 0,
243 };
244 send_resp_raw(fd, &resp)
245}
246
247fn inject_fd_and_send(fd: RawFd, id: u64, srcfd: RawFd, newfd_flags: u32) -> io::Result<i32> {
253 let addfd = SeccompNotifAddfd {
254 id,
255 flags: SECCOMP_ADDFD_FLAG_SEND,
256 srcfd: srcfd as u32,
257 newfd: 0, newfd_flags,
259 };
260 let ret = unsafe {
261 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_ADDFD as libc::c_ulong, &addfd as *const _)
262 };
263 if ret < 0 {
264 Err(io::Error::last_os_error())
265 } else {
266 Ok(ret as i32)
267 }
268}
269
270fn inject_fd(fd: RawFd, id: u64, srcfd: RawFd, targetfd: i32) -> io::Result<()> {
273 let addfd = SeccompNotifAddfd {
274 id,
275 flags: 0,
276 srcfd: srcfd as u32,
277 newfd: targetfd as u32,
278 newfd_flags: 0,
279 };
280 let ret = unsafe {
281 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_ADDFD as libc::c_ulong, &addfd as *const _)
282 };
283 if ret < 0 {
284 Err(io::Error::last_os_error())
285 } else {
286 Ok(())
287 }
288}
289
290fn send_resp_raw(fd: RawFd, resp: &SeccompNotifResp) -> io::Result<()> {
292 let ret = unsafe {
293 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_SEND as libc::c_ulong, resp as *const _)
294 };
295 if ret < 0 {
296 Err(io::Error::last_os_error())
297 } else {
298 Ok(())
299 }
300}
301
302pub(crate) fn id_valid(fd: RawFd, id: u64) -> io::Result<()> {
305 let ret = unsafe {
306 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_ID_VALID as libc::c_ulong, &id as *const _)
307 };
308 if ret < 0 {
309 Err(io::Error::last_os_error())
310 } else {
311 Ok(())
312 }
313}
314
315fn try_set_sync_wakeup(fd: RawFd) {
317 let flags: u64 = SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP as u64;
318 unsafe {
319 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_SET_FLAGS as libc::c_ulong, &flags as *const _);
320 }
321}
322
323fn read_child_mem_vm(pid: u32, addr: u64, len: usize) -> Result<Vec<u8>, NotifError> {
329 let mut buf = vec![0u8; len];
330 let local_iov = libc::iovec {
331 iov_base: buf.as_mut_ptr() as *mut libc::c_void,
332 iov_len: len,
333 };
334 let remote_iov = libc::iovec {
335 iov_base: addr as *mut libc::c_void,
336 iov_len: len,
337 };
338 let ret = unsafe {
339 libc::process_vm_readv(pid as i32, &local_iov, 1, &remote_iov, 1, 0)
340 };
341 if ret < 0 {
342 Err(NotifError::ChildMemoryRead(io::Error::last_os_error()))
343 } else {
344 buf.truncate(ret as usize);
345 Ok(buf)
346 }
347}
348
349fn write_child_mem_vm(pid: u32, addr: u64, data: &[u8]) -> Result<(), NotifError> {
351 let local_iov = libc::iovec {
352 iov_base: data.as_ptr() as *mut libc::c_void,
353 iov_len: data.len(),
354 };
355 let remote_iov = libc::iovec {
356 iov_base: addr as *mut libc::c_void,
357 iov_len: data.len(),
358 };
359 let ret = unsafe {
360 libc::process_vm_writev(pid as i32, &local_iov, 1, &remote_iov, 1, 0)
361 };
362 if ret < 0 {
363 Err(NotifError::ChildMemoryRead(io::Error::last_os_error()))
364 } else if (ret as usize) < data.len() {
365 Err(NotifError::ChildMemoryRead(io::Error::new(
366 io::ErrorKind::WriteZero,
367 format!("short write: {} of {} bytes", ret, data.len()),
368 )))
369 } else {
370 Ok(())
371 }
372}
373
374pub(crate) fn read_child_mem(
379 notif_fd: RawFd,
380 id: u64,
381 pid: u32,
382 addr: u64,
383 len: usize,
384) -> Result<Vec<u8>, NotifError> {
385 id_valid(notif_fd, id).map_err(NotifError::Ioctl)?;
386 let result = read_child_mem_vm(pid, addr, len)?;
387 id_valid(notif_fd, id).map_err(NotifError::Ioctl)?;
388 Ok(result)
389}
390
391pub(crate) fn read_child_cstr(
394 notif_fd: RawFd,
395 id: u64,
396 pid: u32,
397 addr: u64,
398 max_len: usize,
399) -> Option<String> {
400 if addr == 0 || max_len == 0 {
401 return None;
402 }
403
404 const PAGE_SIZE: u64 = 4096;
405 let mut result = Vec::with_capacity(max_len.min(256));
406 let mut cur = addr;
407 while result.len() < max_len {
408 let page_remaining = PAGE_SIZE - (cur % PAGE_SIZE);
409 let remaining = max_len - result.len();
410 let to_read = page_remaining.min(remaining as u64) as usize;
411 let bytes = read_child_mem(notif_fd, id, pid, cur, to_read).ok()?;
412 if let Some(nul) = bytes.iter().position(|&b| b == 0) {
413 result.extend_from_slice(&bytes[..nul]);
414 return String::from_utf8(result).ok();
415 }
416 result.extend_from_slice(&bytes);
417 cur += to_read as u64;
418 }
419
420 String::from_utf8(result).ok()
421}
422
423pub(crate) fn write_child_mem(
428 notif_fd: RawFd,
429 id: u64,
430 pid: u32,
431 addr: u64,
432 data: &[u8],
433) -> Result<(), NotifError> {
434 id_valid(notif_fd, id).map_err(NotifError::Ioctl)?;
435 write_child_mem_vm(pid, addr, data)?;
436 id_valid(notif_fd, id).map_err(NotifError::Ioctl)?;
437 Ok(())
438}
439
440fn send_response(fd: RawFd, id: u64, action: NotifAction) -> io::Result<()> {
446 match action {
447 NotifAction::Continue => respond_continue(fd, id),
448 NotifAction::Errno(errno) => respond_errno(fd, id, errno),
449 NotifAction::InjectFd { srcfd, targetfd } => {
450 inject_fd(fd, id, srcfd, targetfd)?;
451 respond_continue(fd, id)
452 }
453 NotifAction::InjectFdSend { srcfd, newfd_flags } => {
454 match inject_fd_and_send(fd, id, srcfd.as_raw_fd(), newfd_flags) {
459 Ok(_new_fd) => Ok(()),
460 Err(_) => respond_continue(fd, id),
461 }
462 }
463 NotifAction::InjectFdSendTracked { srcfd, newfd_flags, on_success } => {
464 match inject_fd_and_send(fd, id, srcfd.as_raw_fd(), newfd_flags) {
465 Ok(new_fd) => {
466 (on_success.0)(new_fd);
467 Ok(())
468 }
469 Err(_) => respond_continue(fd, id),
470 }
471 }
472 NotifAction::ReturnValue(val) => respond_value(fd, id, val),
473 NotifAction::Hold => Ok(()), NotifAction::Kill { sig, pgid } => {
475 unsafe { libc::killpg(pgid, sig) };
478 respond_errno(fd, id, ENOMEM)
479 }
480 }
481}
482
483fn maybe_patch_vdso(pid: i32, procfs: &mut super::state::ProcfsState, policy: &NotifPolicy) {
489 let base = match crate::vdso::find_vdso_base(pid) {
490 Ok(addr) => addr,
491 Err(_) => return,
492 };
493 if base == procfs.vdso_patched_addr {
494 return; }
496 let time_offset = if policy.has_time_start { Some(policy.time_offset) } else { None };
497 if crate::vdso::patch(pid, time_offset, policy.has_random_seed).is_ok() {
498 procfs.vdso_patched_addr = base;
499 }
500}
501
502fn syscall_name(nr: i64) -> &'static str {
508 match nr {
509 n if n == libc::SYS_openat => "openat",
510 n if n == libc::SYS_connect => "connect",
511 n if n == libc::SYS_sendto => "sendto",
512 n if n == libc::SYS_sendmsg => "sendmsg",
513 n if n == libc::SYS_bind => "bind",
514 n if n == libc::SYS_clone => "clone",
515 n if n == libc::SYS_clone3 => "clone3",
516 n if Some(n) == arch::SYS_VFORK => "vfork",
517 n if n == libc::SYS_execve => "execve",
518 n if n == libc::SYS_execveat => "execveat",
519 n if n == libc::SYS_mmap => "mmap",
520 n if n == libc::SYS_munmap => "munmap",
521 n if n == libc::SYS_brk => "brk",
522 n if n == libc::SYS_getrandom => "getrandom",
523 n if n == libc::SYS_unlinkat => "unlinkat",
524 n if n == libc::SYS_mkdirat => "mkdirat",
525 _ => "unknown",
526 }
527}
528
529fn syscall_category(nr: i64) -> crate::policy_fn::SyscallCategory {
531 use crate::policy_fn::SyscallCategory;
532 match nr {
533 n if n == libc::SYS_openat || n == libc::SYS_unlinkat
534 || n == libc::SYS_mkdirat || n == libc::SYS_renameat2
535 || n == libc::SYS_symlinkat || n == libc::SYS_linkat
536 || n == libc::SYS_fchmodat || n == libc::SYS_fchownat
537 || n == libc::SYS_truncate || n == libc::SYS_readlinkat
538 || n == libc::SYS_newfstatat || n == libc::SYS_statx
539 || n == libc::SYS_faccessat || n == libc::SYS_getdents64
540 || Some(n) == arch::SYS_GETDENTS => SyscallCategory::File,
541 n if n == libc::SYS_connect || n == libc::SYS_sendto
542 || n == libc::SYS_sendmsg || n == libc::SYS_bind
543 || n == libc::SYS_getsockname => SyscallCategory::Network,
544 n if n == libc::SYS_clone || n == libc::SYS_clone3
545 || Some(n) == arch::SYS_VFORK || n == libc::SYS_execve
546 || n == libc::SYS_execveat => SyscallCategory::Process,
547 n if n == libc::SYS_mmap || n == libc::SYS_munmap
548 || n == libc::SYS_brk || n == libc::SYS_mremap
549 => SyscallCategory::Memory,
550 _ => SyscallCategory::File, }
552}
553
554fn read_ppid(pid: u32) -> Option<u32> {
556 let stat = std::fs::read_to_string(format!("/proc/{}/stat", pid)).ok()?;
557 let close_paren = stat.rfind(')')?;
560 let rest = &stat[close_paren + 2..]; let fields: Vec<&str> = rest.split_whitespace().collect();
562 fields.get(1)?.parse().ok()
564}
565
566fn read_path_for_event(notif: &SeccompNotif, addr: u64, notif_fd: RawFd) -> Option<String> {
568 if addr == 0 { return None; }
569 let bytes = read_child_mem(notif_fd, notif.id, notif.pid, addr, 256).ok()?;
570 let nul = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
571 String::from_utf8(bytes[..nul].to_vec()).ok()
572}
573
574fn normalize_path(path: &std::path::Path) -> String {
575 use std::path::{Component, PathBuf};
576
577 let mut normalized = PathBuf::new();
578 let absolute = path.is_absolute();
579 if absolute {
580 normalized.push("/");
581 }
582
583 for component in path.components() {
584 match component {
585 Component::RootDir | Component::CurDir => {}
586 Component::ParentDir => {
587 normalized.pop();
588 }
589 Component::Normal(part) => normalized.push(part),
590 Component::Prefix(_) => {}
591 }
592 }
593
594 if normalized.as_os_str().is_empty() {
595 if absolute { "/".into() } else { ".".into() }
596 } else {
597 normalized.to_string_lossy().into_owned()
598 }
599}
600
601fn resolve_at_path_for_event(notif: &SeccompNotif, dirfd: i64, path: &str) -> Option<String> {
602 use std::path::Path;
603
604 if Path::new(path).is_absolute() {
605 return Some(normalize_path(Path::new(path)));
606 }
607
608 let dirfd32 = dirfd as i32;
609 let base = if dirfd32 == libc::AT_FDCWD {
610 std::fs::read_link(format!("/proc/{}/cwd", notif.pid)).ok()?
611 } else {
612 std::fs::read_link(format!("/proc/{}/fd/{}", notif.pid, dirfd32)).ok()?
613 };
614
615 Some(normalize_path(&base.join(path)))
616}
617
618fn resolve_path_for_notif(notif: &SeccompNotif, notif_fd: RawFd) -> Option<String> {
619 let nr = notif.data.nr as i64;
620 match nr {
621 n if n == libc::SYS_openat => {
622 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
624 resolve_at_path_for_event(notif, notif.data.args[0] as i64, &path)
625 }
626 n if Some(n) == arch::SYS_OPEN || n == libc::SYS_execve => {
627 let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?;
628 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path)
629 }
630 n if n == libc::SYS_execveat => {
631 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
632 resolve_at_path_for_event(notif, notif.data.args[0] as i64, &path)
633 }
634 n if n == libc::SYS_linkat => {
637 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
638 resolve_at_path_for_event(notif, notif.data.args[0] as i64, &path)
639 }
640 n if n == libc::SYS_renameat2 => {
643 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
644 resolve_at_path_for_event(notif, notif.data.args[0] as i64, &path)
645 }
646 n if n == libc::SYS_symlinkat => {
649 let target = read_path_for_event(notif, notif.data.args[0], notif_fd)?;
650 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &target)
652 }
653 n if Some(n) == arch::SYS_LINK => {
655 let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?;
656 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path)
657 }
658 n if Some(n) == arch::SYS_RENAME => {
660 let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?;
661 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path)
662 }
663 n if Some(n) == arch::SYS_SYMLINK => {
665 let target = read_path_for_event(notif, notif.data.args[0], notif_fd)?;
666 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &target)
667 }
668 _ => None,
669 }
670}
671
672fn resolve_second_path_for_notif(notif: &SeccompNotif, notif_fd: RawFd) -> Option<String> {
676 let nr = notif.data.nr as i64;
677 match nr {
678 n if n == libc::SYS_renameat2 => {
680 let path = read_path_for_event(notif, notif.data.args[3], notif_fd)?;
681 resolve_at_path_for_event(notif, notif.data.args[2] as i64, &path)
682 }
683 n if n == libc::SYS_linkat => {
687 let path = read_path_for_event(notif, notif.data.args[3], notif_fd)?;
688 resolve_at_path_for_event(notif, notif.data.args[2] as i64, &path)
689 }
690 n if Some(n) == arch::SYS_RENAME => {
692 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
693 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path)
694 }
695 n if Some(n) == arch::SYS_LINK => {
697 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
698 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path)
699 }
700 _ => None,
701 }
702}
703
704fn read_sockaddr_for_event(notif: &SeccompNotif, addr: u64, len: usize, notif_fd: RawFd)
706 -> (Option<std::net::IpAddr>, Option<u16>)
707{
708 if addr == 0 || len < 4 { return (None, None); }
709 let bytes = match read_child_mem(notif_fd, notif.id, notif.pid, addr, len.min(128)) {
710 Ok(b) => b,
711 Err(_) => return (None, None),
712 };
713 if bytes.len() < 4 { return (None, None); }
714 let family = u16::from_ne_bytes([bytes[0], bytes[1]]);
715 let port = u16::from_be_bytes([bytes[2], bytes[3]]);
716 let ip = match family as u32 {
717 f if f == crate::sys::structs::AF_INET && bytes.len() >= 8 => {
718 Some(std::net::IpAddr::V4(std::net::Ipv4Addr::new(
719 bytes[4], bytes[5], bytes[6], bytes[7],
720 )))
721 }
722 f if f == crate::sys::structs::AF_INET6 && bytes.len() >= 24 => {
723 let mut addr = [0u8; 16];
724 addr.copy_from_slice(&bytes[8..24]);
725 Some(std::net::IpAddr::V6(std::net::Ipv6Addr::from(addr)))
726 }
727 _ => None,
728 };
729 (ip, if port > 0 { Some(port) } else { None })
730}
731
732fn read_argv_for_event(notif: &SeccompNotif, argv_ptr: u64, notif_fd: RawFd) -> Option<Vec<String>> {
735 if argv_ptr == 0 { return None; }
736 let mut args = Vec::new();
737 let ptr_size = std::mem::size_of::<u64>();
738
739 for i in 0..64u64 {
740 let ptr_addr = argv_ptr + i * ptr_size as u64;
741 let ptr_bytes = read_child_mem(notif_fd, notif.id, notif.pid, ptr_addr, ptr_size).ok()?;
742 let str_ptr = u64::from_ne_bytes(ptr_bytes[..8].try_into().ok()?);
743 if str_ptr == 0 { break; } if let Some(s) = read_path_for_event(notif, str_ptr, notif_fd) {
746 args.push(s);
747 } else {
748 break;
749 }
750 }
751
752 if args.is_empty() { None } else { Some(args) }
753}
754
755async fn emit_policy_event(
758 notif: &SeccompNotif,
759 action: &NotifAction,
760 policy_fn_state: &Arc<tokio::sync::Mutex<super::state::PolicyFnState>>,
761 notif_fd: RawFd,
762) -> Option<crate::policy_fn::Verdict> {
763 let pfs = policy_fn_state.lock().await;
764 let tx = match pfs.event_tx.as_ref() {
765 Some(tx) => tx.clone(),
766 None => return None,
767 };
768 drop(pfs);
769
770 let nr = notif.data.nr as i64;
771 let denied = matches!(action, NotifAction::Errno(_));
772 let name = syscall_name(nr);
773 let category = syscall_category(nr);
774 let parent_pid = read_ppid(notif.pid);
775
776 let mut host = None;
791 let mut port = None;
792 let mut size = None;
793 let mut argv = None;
794
795 if nr == libc::SYS_execve || nr == libc::SYS_execveat {
796 let argv_ptr = if nr == libc::SYS_execveat {
799 notif.data.args[2]
800 } else {
801 notif.data.args[1]
802 };
803 argv = read_argv_for_event(notif, argv_ptr, notif_fd);
804 }
805
806 if nr == libc::SYS_connect || nr == libc::SYS_sendto || nr == libc::SYS_bind {
807 let addr_ptr = notif.data.args[1];
809 let addr_len = notif.data.args[2] as usize;
810 let (h, p) = read_sockaddr_for_event(notif, addr_ptr, addr_len, notif_fd);
811 host = h;
812 port = p;
813 }
814
815 if nr == libc::SYS_mmap {
816 size = Some(notif.data.args[1]);
818 }
819
820 let event = crate::policy_fn::SyscallEvent {
821 syscall: name.to_string(),
822 category,
823 pid: notif.pid,
824 parent_pid,
825 host,
826 port,
827 size,
828 argv,
829 denied,
830 };
831
832 let is_held = nr == libc::SYS_execve || nr == libc::SYS_execveat
835 || nr == libc::SYS_connect || nr == libc::SYS_sendto
836 || nr == libc::SYS_bind || nr == libc::SYS_openat;
837
838 if is_held {
839 let (gate_tx, gate_rx) = tokio::sync::oneshot::channel();
840 let _ = tx.send(crate::policy_fn::PolicyEvent {
841 event,
842 gate: Some(gate_tx),
843 });
844 match tokio::time::timeout(std::time::Duration::from_secs(5), gate_rx).await {
845 Ok(Ok(verdict)) => Some(verdict),
846 _ => None, }
848 } else {
849 let _ = tx.send(crate::policy_fn::PolicyEvent {
850 event,
851 gate: None,
852 });
853 None
854 }
855}
856
857async fn handle_notification(
864 notif: SeccompNotif,
865 ctx: &Arc<super::ctx::SupervisorCtx>,
866 dispatch_table: &super::dispatch::DispatchTable,
867 fd: RawFd,
868) {
869 let policy = &ctx.policy;
870
871 crate::resource::register_child_if_new(ctx, notif.pid as i32).await;
876
877 if policy.has_time_start || policy.has_random_seed {
879 let mut pfs = ctx.procfs.lock().await;
880 maybe_patch_vdso(notif.pid as i32, &mut pfs, policy);
881 }
882
883 let mut action = {
885 let nr = notif.data.nr as i64;
886 let mut path_check_nrs = vec![
887 libc::SYS_openat, libc::SYS_execve, libc::SYS_execveat,
888 libc::SYS_linkat, libc::SYS_renameat2, libc::SYS_symlinkat,
889 ];
890 path_check_nrs.extend([
891 arch::SYS_OPEN, arch::SYS_LINK, arch::SYS_RENAME, arch::SYS_SYMLINK,
892 ].into_iter().flatten());
893 let should_precheck_denied = policy.chroot_root.is_none()
894 && path_check_nrs.contains(&nr);
895 if should_precheck_denied {
896 let pfs = ctx.policy_fn.lock().await;
897 if is_path_denied_for_notif(&pfs, ¬if, fd) {
898 NotifAction::Errno(libc::EACCES)
899 } else {
900 drop(pfs);
901 dispatch_table.dispatch(notif, ctx, fd).await
902 }
903 } else {
904 dispatch_table.dispatch(notif, ctx, fd).await
905 }
906 };
907
908 if let Some(verdict) = emit_policy_event(¬if, &action, &ctx.policy_fn, fd).await {
910 use crate::policy_fn::Verdict;
911 match verdict {
912 Verdict::Deny => { action = NotifAction::Errno(libc::EPERM); }
913 Verdict::DenyWith(errno) => { action = NotifAction::Errno(errno); }
914 Verdict::Audit => { }
915 Verdict::Allow => {}
916 }
917 }
918
919 let nr = notif.data.nr as i64;
933 if matches!(action, NotifAction::Continue)
934 && crate::sibling_freeze::requires_freeze_on_continue(nr)
935 {
936 if let Err(e) = crate::sibling_freeze::freeze_siblings_for_execve(notif.pid as i32) {
937 eprintln!(
938 "sandlock: argv-safety freeze failed for pid {}: {} \
939 — denying execve to preserve TOCTOU invariant",
940 notif.pid, e
941 );
942 action = NotifAction::Errno(libc::EPERM);
943 }
944 }
945
946 let _ = send_response(fd, notif.id, action);
948}
949
950pub async fn supervisor(
958 notif_fd: OwnedFd,
959 ctx: Arc<super::ctx::SupervisorCtx>,
960) {
961 let fd = notif_fd.as_raw_fd();
962
963 let dispatch_table = Arc::new(super::dispatch::build_dispatch_table(&ctx.policy, &ctx.resource));
965
966 try_set_sync_wakeup(fd);
968
969 let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<SeccompNotif>();
978
979 std::thread::spawn(move || {
980 loop {
981 match recv_notif(fd) {
982 Ok(notif) => {
983 if tx.send(notif).is_err() {
984 break; }
986 }
987 Err(_) => break, }
989 }
990 });
991
992 let gc = tokio::spawn(process_index_gc(Arc::clone(&ctx.processes)));
998
999 while let Some(notif) = rx.recv().await {
1000 handle_notification(notif, &ctx, &dispatch_table, fd).await;
1001 }
1002
1003 gc.abort();
1004}
1005
1006async fn process_index_gc(processes: Arc<super::state::ProcessIndex>) {
1010 let interval = std::time::Duration::from_secs(300);
1011 loop {
1012 tokio::time::sleep(interval).await;
1013 if processes.len() == 0 {
1014 continue;
1015 }
1016 processes.prune_dead();
1017 }
1018}
1019
1020pub(crate) fn spawn_pid_watcher(
1030 ctx: Arc<super::ctx::SupervisorCtx>,
1031 key: super::state::PidKey,
1032 pidfd: std::os::unix::io::OwnedFd,
1033) {
1034 tokio::spawn(async move {
1035 let async_fd = match tokio::io::unix::AsyncFd::with_interest(
1036 pidfd,
1037 tokio::io::Interest::READABLE,
1038 ) {
1039 Ok(f) => f,
1040 Err(_) => {
1041 cleanup_pid(&ctx, key).await;
1047 return;
1048 }
1049 };
1050 let _ = async_fd.readable().await;
1053 cleanup_pid(&ctx, key).await;
1054 });
1056}
1057
1058pub(crate) async fn cleanup_pid(ctx: &super::ctx::SupervisorCtx, key: super::state::PidKey) {
1064 ctx.processes.unregister(key);
1065}
1066
1067#[cfg(test)]
1072mod tests {
1073 use super::*;
1074
1075 #[test]
1076 fn test_notif_action_debug() {
1077 let _ = format!("{:?}", NotifAction::Continue);
1079 let _ = format!("{:?}", NotifAction::Errno(1));
1080 let _ = format!("{:?}", NotifAction::InjectFd { srcfd: 3, targetfd: 4 });
1081 let test_fd = unsafe { OwnedFd::from_raw_fd(libc::dup(2)) };
1083 let _ = format!("{:?}", NotifAction::InjectFdSend { srcfd: test_fd, newfd_flags: 0 });
1084 let _ = format!("{:?}", NotifAction::ReturnValue(42));
1085 let _ = format!("{:?}", NotifAction::Hold);
1086 let _ = format!("{:?}", NotifAction::Kill { sig: 9, pgid: 1 });
1087 }
1088
1089 #[test]
1090 fn test_network_state_new() {
1091 let ns = super::super::state::NetworkState::new();
1092 assert!(matches!(ns.network_policy, NetworkPolicy::Unrestricted));
1093 assert!(ns.port_map.bound_ports.is_empty());
1094 }
1095
1096 #[test]
1097 fn test_time_random_state_new() {
1098 let tr = super::super::state::TimeRandomState::new(None, None);
1099 assert!(tr.time_offset.is_none());
1100 assert!(tr.random_state.is_none());
1101 }
1102
1103 #[test]
1104 fn test_resource_state_new() {
1105 let rs = super::super::state::ResourceState::new(1024 * 1024, 10);
1106 assert_eq!(rs.mem_used, 0);
1107 assert_eq!(rs.max_memory_bytes, 1024 * 1024);
1108 assert_eq!(rs.max_processes, 10);
1109 assert!(!rs.hold_forks);
1110 assert!(rs.held_notif_ids.is_empty());
1111 }
1112
1113 #[test]
1114 fn test_process_vm_readv_self() {
1115 let data: u64 = 0xDEADBEEF_CAFEBABE;
1116 let addr = &data as *const u64 as u64;
1117 let pid = std::process::id();
1118 let result = read_child_mem_vm(pid, addr, 8);
1119 assert!(result.is_ok());
1120 let bytes = result.unwrap();
1121 let read_val = u64::from_ne_bytes(bytes[..8].try_into().unwrap());
1122 assert_eq!(read_val, 0xDEADBEEF_CAFEBABE);
1123 }
1124
1125 #[test]
1126 fn test_process_vm_writev_self() {
1127 let mut data: u64 = 0;
1128 let addr = &mut data as *mut u64 as u64;
1129 let pid = std::process::id();
1130 let payload = 0x1234567890ABCDEFu64.to_ne_bytes();
1131 let result = write_child_mem_vm(pid, addr, &payload);
1132 assert!(result.is_ok());
1133 assert_eq!(data, 0x1234567890ABCDEF);
1134 }
1135}