1use std::collections::{HashMap, HashSet};
6use std::future::Future;
7use std::io;
8use std::net::IpAddr;
9use std::os::unix::io::{AsRawFd, OwnedFd, RawFd};
10use std::pin::Pin;
11use std::sync::Arc;
12
13use crate::error::NotifError;
14use crate::arch;
15use crate::sys::structs::{
16 SeccompNotif, SeccompNotifAddfd, SeccompNotifResp,
17 SECCOMP_ADDFD_FLAG_SEND, SECCOMP_IOCTL_NOTIF_ADDFD, SECCOMP_IOCTL_NOTIF_ID_VALID, SECCOMP_IOCTL_NOTIF_RECV,
18 SECCOMP_IOCTL_NOTIF_SEND, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
19 SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, SECCOMP_USER_NOTIF_FLAG_CONTINUE,
20 ENOMEM,
21};
22
23pub struct OnInjectSuccess(pub Box<dyn FnOnce(i32) + Send + Sync>);
34
35impl std::fmt::Debug for OnInjectSuccess {
36 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37 f.write_str("OnInjectSuccess(<callback>)")
38 }
39}
40
41impl OnInjectSuccess {
42 pub fn new<F: FnOnce(i32) + Send + Sync + 'static>(f: F) -> Self {
43 Self(Box::new(f))
44 }
45}
46
47pub struct Deferred(Pin<Box<dyn Future<Output = NotifAction> + Send + 'static>>);
64
65unsafe impl Sync for Deferred {}
75
76impl std::fmt::Debug for Deferred {
77 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78 f.write_str("Deferred(<future>)")
79 }
80}
81
82impl Deferred {
83 pub fn new<F: Future<Output = NotifAction> + Send + 'static>(f: F) -> Self {
84 Self(Box::pin(f))
85 }
86
87 pub async fn run(self) -> NotifAction {
90 self.0.await
91 }
92}
93
94#[derive(Debug)]
96pub enum NotifAction {
97 Continue,
99 Errno(i32),
101 InjectFd { srcfd: RawFd, targetfd: i32 },
103 InjectFdSend { srcfd: OwnedFd, newfd_flags: u32 },
108 InjectFdSendTracked {
113 srcfd: OwnedFd,
114 newfd_flags: u32,
115 on_success: OnInjectSuccess,
116 },
117 ReturnValue(i64),
119 Hold,
121 Kill { sig: i32, pgid: i32 },
124 Defer(Deferred),
129}
130
131impl NotifAction {
132 pub fn defer<F: Future<Output = NotifAction> + Send + 'static>(fut: F) -> Self {
135 NotifAction::Defer(Deferred::new(fut))
136 }
137
138 pub fn inject_bytes(content: &[u8]) -> NotifAction {
153 match content_memfd(content, true) {
154 Ok(fd) => NotifAction::InjectFdSend {
155 srcfd: fd,
156 newfd_flags: libc::O_CLOEXEC as u32,
157 },
158 Err(_) => NotifAction::Errno(libc::EIO),
159 }
160 }
161}
162
163pub fn content_memfd(content: &[u8], seal: bool) -> io::Result<OwnedFd> {
177 use std::io::{Seek, SeekFrom, Write};
178 use std::os::unix::io::FromRawFd;
179
180 let flags = if seal {
181 (libc::MFD_CLOEXEC | libc::MFD_ALLOW_SEALING) as u32
182 } else {
183 libc::MFD_CLOEXEC as u32
184 };
185 let memfd = crate::sys::syscall::memfd_create("sandlock-content", flags)?;
186
187 {
190 let raw = memfd.as_raw_fd();
191 let mut file = unsafe { std::fs::File::from_raw_fd(raw) };
192 let res = file
193 .write_all(content)
194 .and_then(|()| file.seek(SeekFrom::Start(0)).map(|_| ()));
195 std::mem::forget(file); res?;
197 }
198
199 if seal {
200 let seals =
202 libc::F_SEAL_SEAL | libc::F_SEAL_WRITE | libc::F_SEAL_GROW | libc::F_SEAL_SHRINK;
203 unsafe { libc::fcntl(memfd.as_raw_fd(), libc::F_ADD_SEALS, seals) };
204 }
205
206 Ok(memfd)
207}
208
209fn finalize_deferred(action: NotifAction) -> NotifAction {
214 match action {
215 NotifAction::Defer(_) => NotifAction::Errno(libc::EIO),
216 other => other,
217 }
218}
219
220#[derive(Debug, Clone)]
228pub enum PortAllow {
229 Any,
231 Specific(HashSet<u16>),
233}
234
235#[derive(Debug, Clone)]
237pub enum NetworkPolicy {
238 Unrestricted,
241 AllowList {
244 per_ip: HashMap<IpAddr, PortAllow>,
247 any_ip_ports: HashSet<u16>,
250 },
251}
252
253impl NetworkPolicy {
254 pub fn allows(&self, ip: IpAddr, port: u16) -> bool {
256 match self {
257 NetworkPolicy::Unrestricted => true,
258 NetworkPolicy::AllowList { per_ip, any_ip_ports } => {
259 if any_ip_ports.contains(&port) {
260 return true;
261 }
262 match per_ip.get(&ip) {
263 Some(PortAllow::Any) => true,
264 Some(PortAllow::Specific(s)) => s.contains(&port),
265 None => false,
266 }
267 }
268 }
269 }
270}
271
272pub(crate) fn is_path_denied_for_notif(
283 policy_fn_state: &super::state::PolicyFnState,
284 notif: &SeccompNotif,
285 notif_fd: RawFd,
286) -> bool {
287 if let Some(path) = resolve_path_for_notif(notif, notif_fd) {
288 if is_denied_with_symlink_resolve(policy_fn_state, &path) {
289 return true;
290 }
291 }
292 if let Some(path) = resolve_second_path_for_notif(notif, notif_fd) {
294 if is_denied_with_symlink_resolve(policy_fn_state, &path) {
295 return true;
296 }
297 }
298 false
299}
300
301fn is_denied_with_symlink_resolve(
307 policy_fn_state: &super::state::PolicyFnState,
308 path: &str,
309) -> bool {
310 if policy_fn_state.is_path_denied(path) {
312 return true;
313 }
314 if let Ok(real) = std::fs::canonicalize(path) {
316 if policy_fn_state.is_path_denied(&real.to_string_lossy()) {
317 return true;
318 }
319 }
320 false
321}
322
323fn tgid_of(tid: u32) -> Option<u32> {
325 let status = std::fs::read_to_string(format!("/proc/{}/status", tid)).ok()?;
326 status
327 .lines()
328 .find_map(|l| l.strip_prefix("Tgid:").and_then(|r| r.trim().parse().ok()))
329}
330
331pub(crate) fn dup_fd_from_pid(pid: u32, target_fd: i32) -> io::Result<OwnedFd> {
340 use crate::sys::syscall::{pidfd_getfd, pidfd_open};
341 let pidfd = pidfd_open(pid, 0).or_else(|e| match tgid_of(pid) {
342 Some(tgid) if tgid != pid => pidfd_open(tgid, 0),
343 _ => Err(e),
344 })?;
345 pidfd_getfd(&pidfd, target_fd, 0)
346}
347
348pub struct NotifPolicy {
354 pub max_memory_bytes: u64,
355 pub max_processes: u32,
356 pub has_memory_limit: bool,
357 pub has_net_allowlist: bool,
358 pub has_random_seed: bool,
359 pub has_time_start: bool,
360 pub argv_safety_required: bool,
367 pub time_offset: i64,
368 pub num_cpus: Option<u32>,
369 pub port_remap: bool,
370 pub cow_enabled: bool,
371 pub chroot_root: Option<std::path::PathBuf>,
372 pub chroot_readable: Vec<std::path::PathBuf>,
374 pub chroot_writable: Vec<std::path::PathBuf>,
376 pub chroot_denied: Vec<std::path::PathBuf>,
378 pub chroot_mounts: Vec<(std::path::PathBuf, std::path::PathBuf)>,
380 pub deterministic_dirs: bool,
381 pub virtual_hostname: Option<String>,
382 pub has_http_acl: bool,
383 pub virtual_etc_hosts: String,
388}
389
390fn recv_notif(fd: RawFd) -> io::Result<SeccompNotif> {
397 let mut notif: SeccompNotif = unsafe { std::mem::zeroed() };
398 let ret = unsafe {
399 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_RECV as libc::c_ulong, &mut notif as *mut _)
400 };
401 if ret < 0 {
402 Err(io::Error::last_os_error())
403 } else {
404 Ok(notif)
405 }
406}
407
408enum NotifFdState {
410 Pending,
413 Empty,
416 Terminal,
421}
422
423fn probe_notif_fd(fd: RawFd) -> NotifFdState {
434 let mut pfd = libc::pollfd {
435 fd,
436 events: libc::POLLIN,
437 revents: 0,
438 };
439 let r = unsafe { libc::poll(&mut pfd, 1, 0) };
440 if r > 0 && (pfd.revents & libc::POLLIN) != 0 {
441 return NotifFdState::Pending;
442 }
443 if r < 0 || (pfd.revents & (libc::POLLHUP | libc::POLLERR | libc::POLLNVAL)) != 0 {
444 return NotifFdState::Terminal;
445 }
446 NotifFdState::Empty
447}
448
449fn respond_continue(fd: RawFd, id: u64) -> io::Result<()> {
451 let resp = SeccompNotifResp {
452 id,
453 val: 0,
454 error: 0,
455 flags: SECCOMP_USER_NOTIF_FLAG_CONTINUE,
456 };
457 send_resp_raw(fd, &resp)
458}
459
460fn respond_errno(fd: RawFd, id: u64, errno: i32) -> io::Result<()> {
462 let resp = SeccompNotifResp {
463 id,
464 val: 0,
465 error: -errno,
466 flags: 0,
467 };
468 send_resp_raw(fd, &resp)
469}
470
471fn respond_value(fd: RawFd, id: u64, val: i64) -> io::Result<()> {
473 let resp = SeccompNotifResp {
474 id,
475 val,
476 error: 0,
477 flags: 0,
478 };
479 send_resp_raw(fd, &resp)
480}
481
482fn inject_failure_resp(id: u64) -> SeccompNotifResp {
490 SeccompNotifResp {
491 id,
492 val: 0,
493 error: -libc::EACCES,
494 flags: 0,
495 }
496}
497
498fn inject_fd_and_send(fd: RawFd, id: u64, srcfd: RawFd, newfd_flags: u32) -> io::Result<i32> {
504 let addfd = SeccompNotifAddfd {
505 id,
506 flags: SECCOMP_ADDFD_FLAG_SEND,
507 srcfd: srcfd as u32,
508 newfd: 0, newfd_flags,
510 };
511 let ret = unsafe {
512 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_ADDFD as libc::c_ulong, &addfd as *const _)
513 };
514 if ret < 0 {
515 Err(io::Error::last_os_error())
516 } else {
517 Ok(ret as i32)
518 }
519}
520
521fn inject_fd(fd: RawFd, id: u64, srcfd: RawFd, targetfd: i32) -> io::Result<()> {
524 let addfd = SeccompNotifAddfd {
525 id,
526 flags: 0,
527 srcfd: srcfd as u32,
528 newfd: targetfd as u32,
529 newfd_flags: 0,
530 };
531 let ret = unsafe {
532 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_ADDFD as libc::c_ulong, &addfd as *const _)
533 };
534 if ret < 0 {
535 Err(io::Error::last_os_error())
536 } else {
537 Ok(())
538 }
539}
540
541fn send_resp_raw(fd: RawFd, resp: &SeccompNotifResp) -> io::Result<()> {
543 let ret = unsafe {
544 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_SEND as libc::c_ulong, resp as *const _)
545 };
546 if ret < 0 {
547 Err(io::Error::last_os_error())
548 } else {
549 Ok(())
550 }
551}
552
553pub(crate) fn id_valid(fd: RawFd, id: u64) -> io::Result<()> {
556 let ret = unsafe {
557 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_ID_VALID as libc::c_ulong, &id as *const _)
558 };
559 if ret < 0 {
560 Err(io::Error::last_os_error())
561 } else {
562 Ok(())
563 }
564}
565
566fn try_set_sync_wakeup(fd: RawFd) {
568 let flags: u64 = SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP as u64;
569 unsafe {
570 libc::ioctl(fd, SECCOMP_IOCTL_NOTIF_SET_FLAGS as libc::c_ulong, &flags as *const _);
571 }
572}
573
574fn read_child_mem_vm(pid: u32, addr: u64, len: usize) -> Result<Vec<u8>, NotifError> {
580 let mut buf = vec![0u8; len];
581 let local_iov = libc::iovec {
582 iov_base: buf.as_mut_ptr() as *mut libc::c_void,
583 iov_len: len,
584 };
585 let remote_iov = libc::iovec {
586 iov_base: addr as *mut libc::c_void,
587 iov_len: len,
588 };
589 let ret = unsafe {
590 libc::process_vm_readv(pid as i32, &local_iov, 1, &remote_iov, 1, 0)
591 };
592 if ret < 0 {
593 Err(NotifError::ChildMemoryRead(io::Error::last_os_error()))
594 } else {
595 buf.truncate(ret as usize);
596 Ok(buf)
597 }
598}
599
600fn write_child_mem_vm(pid: u32, addr: u64, data: &[u8]) -> Result<(), NotifError> {
602 let local_iov = libc::iovec {
603 iov_base: data.as_ptr() as *mut libc::c_void,
604 iov_len: data.len(),
605 };
606 let remote_iov = libc::iovec {
607 iov_base: addr as *mut libc::c_void,
608 iov_len: data.len(),
609 };
610 let ret = unsafe {
611 libc::process_vm_writev(pid as i32, &local_iov, 1, &remote_iov, 1, 0)
612 };
613 if ret < 0 {
614 Err(NotifError::ChildMemoryRead(io::Error::last_os_error()))
615 } else if (ret as usize) < data.len() {
616 Err(NotifError::ChildMemoryRead(io::Error::new(
617 io::ErrorKind::WriteZero,
618 format!("short write: {} of {} bytes", ret, data.len()),
619 )))
620 } else {
621 Ok(())
622 }
623}
624
625pub fn read_child_mem(
635 notif_fd: RawFd,
636 id: u64,
637 pid: u32,
638 addr: u64,
639 len: usize,
640) -> Result<Vec<u8>, NotifError> {
641 id_valid(notif_fd, id).map_err(NotifError::Ioctl)?;
642 let result = read_child_mem_vm(pid, addr, len)?;
643 id_valid(notif_fd, id).map_err(NotifError::Ioctl)?;
644 Ok(result)
645}
646
647pub fn read_child_cstr(
662 notif_fd: RawFd,
663 id: u64,
664 pid: u32,
665 addr: u64,
666 max_len: usize,
667) -> Option<String> {
668 if addr == 0 || max_len == 0 {
669 return None;
670 }
671
672 const PAGE_SIZE: u64 = 4096;
673 let mut result = Vec::with_capacity(max_len.min(256));
674 let mut cur = addr;
675 while result.len() < max_len {
676 let page_remaining = PAGE_SIZE - (cur % PAGE_SIZE);
677 let remaining = max_len - result.len();
678 let to_read = page_remaining.min(remaining as u64) as usize;
679 let bytes = read_child_mem(notif_fd, id, pid, cur, to_read).ok()?;
680 if let Some(nul) = bytes.iter().position(|&b| b == 0) {
681 result.extend_from_slice(&bytes[..nul]);
682 return String::from_utf8(result).ok();
683 }
684 result.extend_from_slice(&bytes);
685 cur += to_read as u64;
686 }
687
688 String::from_utf8(result).ok()
689}
690
691pub fn write_child_mem(
698 notif_fd: RawFd,
699 id: u64,
700 pid: u32,
701 addr: u64,
702 data: &[u8],
703) -> Result<(), NotifError> {
704 id_valid(notif_fd, id).map_err(NotifError::Ioctl)?;
705 write_child_mem_vm(pid, addr, data)?;
706 id_valid(notif_fd, id).map_err(NotifError::Ioctl)?;
707 Ok(())
708}
709
710fn send_response(fd: RawFd, id: u64, action: NotifAction) -> io::Result<()> {
716 match action {
717 NotifAction::Continue => respond_continue(fd, id),
718 NotifAction::Errno(errno) => respond_errno(fd, id, errno),
719 NotifAction::InjectFd { srcfd, targetfd } => {
720 inject_fd(fd, id, srcfd, targetfd)?;
721 respond_continue(fd, id)
722 }
723 NotifAction::InjectFdSend { srcfd, newfd_flags } => {
724 match inject_fd_and_send(fd, id, srcfd.as_raw_fd(), newfd_flags) {
730 Ok(_new_fd) => Ok(()),
731 Err(_) => send_resp_raw(fd, &inject_failure_resp(id)),
732 }
733 }
734 NotifAction::InjectFdSendTracked { srcfd, newfd_flags, on_success } => {
735 match inject_fd_and_send(fd, id, srcfd.as_raw_fd(), newfd_flags) {
736 Ok(new_fd) => {
737 (on_success.0)(new_fd);
738 Ok(())
739 }
740 Err(_) => send_resp_raw(fd, &inject_failure_resp(id)),
741 }
742 }
743 NotifAction::ReturnValue(val) => respond_value(fd, id, val),
744 NotifAction::Hold => Ok(()), NotifAction::Defer(_) => {
746 debug_assert!(false, "Defer reached send_response; should be intercepted earlier");
750 respond_errno(fd, id, libc::EIO)
751 }
752 NotifAction::Kill { sig, pgid } => {
753 unsafe { libc::killpg(pgid, sig) };
756 respond_errno(fd, id, ENOMEM)
757 }
758 }
759}
760
761fn maybe_patch_vdso(pid: i32, procfs: &mut super::state::ProcfsState, policy: &NotifPolicy) {
767 let base = match crate::vdso::find_vdso_base(pid) {
768 Ok(addr) => addr,
769 Err(_) => return,
770 };
771 if base == procfs.vdso_patched_addr {
772 return; }
774 let time_offset = if policy.has_time_start { Some(policy.time_offset) } else { None };
775 if crate::vdso::patch(pid, time_offset, policy.has_random_seed).is_ok() {
776 procfs.vdso_patched_addr = base;
777 }
778}
779
780fn syscall_name(nr: i64) -> &'static str {
786 match nr {
787 n if n == libc::SYS_openat => "openat",
788 n if n == libc::SYS_connect => "connect",
789 n if n == libc::SYS_sendto => "sendto",
790 n if n == libc::SYS_sendmsg => "sendmsg",
791 n if n == libc::SYS_sendmmsg => "sendmmsg",
792 n if n == libc::SYS_bind => "bind",
793 n if n == libc::SYS_clone => "clone",
794 n if n == libc::SYS_clone3 => "clone3",
795 n if Some(n) == arch::SYS_VFORK => "vfork",
796 n if Some(n) == arch::SYS_FORK => "fork",
797 n if n == libc::SYS_execve => "execve",
798 n if n == libc::SYS_execveat => "execveat",
799 n if n == libc::SYS_mmap => "mmap",
800 n if n == libc::SYS_munmap => "munmap",
801 n if n == libc::SYS_brk => "brk",
802 n if n == libc::SYS_getrandom => "getrandom",
803 n if n == libc::SYS_unlinkat => "unlinkat",
804 n if n == libc::SYS_mkdirat => "mkdirat",
805 _ => "unknown",
806 }
807}
808
809fn syscall_category(nr: i64) -> crate::policy_fn::SyscallCategory {
811 use crate::policy_fn::SyscallCategory;
812 match nr {
813 n if n == libc::SYS_openat || n == libc::SYS_unlinkat
814 || n == libc::SYS_mkdirat || n == libc::SYS_renameat2
815 || n == libc::SYS_symlinkat || n == libc::SYS_linkat
816 || n == libc::SYS_fchmodat || n == libc::SYS_fchownat
817 || n == libc::SYS_truncate || n == libc::SYS_readlinkat
818 || n == libc::SYS_newfstatat || n == libc::SYS_statx
819 || n == libc::SYS_faccessat || n == libc::SYS_getdents64
820 || Some(n) == arch::SYS_GETDENTS => SyscallCategory::File,
821 n if n == libc::SYS_connect || n == libc::SYS_sendto
822 || n == libc::SYS_sendmsg || n == libc::SYS_sendmmsg
823 || n == libc::SYS_bind
824 || n == libc::SYS_getsockname => SyscallCategory::Network,
825 n if n == libc::SYS_clone || n == libc::SYS_clone3
826 || Some(n) == arch::SYS_VFORK || Some(n) == arch::SYS_FORK
827 || n == libc::SYS_execve || n == libc::SYS_execveat => SyscallCategory::Process,
828 n if n == libc::SYS_mmap || n == libc::SYS_munmap
829 || n == libc::SYS_brk || n == libc::SYS_mremap
830 => SyscallCategory::Memory,
831 _ => SyscallCategory::File, }
833}
834
835fn read_ppid(pid: u32) -> Option<u32> {
837 let stat = std::fs::read_to_string(format!("/proc/{}/stat", pid)).ok()?;
838 let close_paren = stat.rfind(')')?;
841 let rest = &stat[close_paren + 2..]; let fields: Vec<&str> = rest.split_whitespace().collect();
843 fields.get(1)?.parse().ok()
845}
846
847fn read_path_for_event(notif: &SeccompNotif, addr: u64, notif_fd: RawFd) -> Option<String> {
849 if addr == 0 { return None; }
850 let bytes = read_child_mem(notif_fd, notif.id, notif.pid, addr, 256).ok()?;
851 let nul = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
852 String::from_utf8(bytes[..nul].to_vec()).ok()
853}
854
855fn normalize_path(path: &std::path::Path) -> String {
856 use std::path::{Component, PathBuf};
857
858 let mut normalized = PathBuf::new();
859 let absolute = path.is_absolute();
860 if absolute {
861 normalized.push("/");
862 }
863
864 for component in path.components() {
865 match component {
866 Component::RootDir | Component::CurDir => {}
867 Component::ParentDir => {
868 normalized.pop();
869 }
870 Component::Normal(part) => normalized.push(part),
871 Component::Prefix(_) => {}
872 }
873 }
874
875 if normalized.as_os_str().is_empty() {
876 if absolute { "/".into() } else { ".".into() }
877 } else {
878 normalized.to_string_lossy().into_owned()
879 }
880}
881
882fn resolve_at_path_for_event(notif: &SeccompNotif, dirfd: i64, path: &str) -> Option<String> {
883 use std::path::Path;
884
885 if Path::new(path).is_absolute() {
886 return Some(normalize_path(Path::new(path)));
887 }
888
889 let dirfd32 = dirfd as i32;
890 let base = if dirfd32 == libc::AT_FDCWD {
891 std::fs::read_link(format!("/proc/{}/cwd", notif.pid)).ok()?
892 } else {
893 std::fs::read_link(format!("/proc/{}/fd/{}", notif.pid, dirfd32)).ok()?
894 };
895
896 Some(normalize_path(&base.join(path)))
897}
898
899fn resolve_path_for_notif(notif: &SeccompNotif, notif_fd: RawFd) -> Option<String> {
900 let nr = notif.data.nr as i64;
901 match nr {
902 n if n == libc::SYS_openat => {
903 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
905 resolve_at_path_for_event(notif, notif.data.args[0] as i64, &path)
906 }
907 n if Some(n) == arch::SYS_OPEN || n == libc::SYS_execve => {
908 let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?;
909 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path)
910 }
911 n if n == libc::SYS_execveat => {
912 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
913 resolve_at_path_for_event(notif, notif.data.args[0] as i64, &path)
914 }
915 n if n == libc::SYS_linkat => {
918 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
919 resolve_at_path_for_event(notif, notif.data.args[0] as i64, &path)
920 }
921 n if n == libc::SYS_renameat2 => {
924 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
925 resolve_at_path_for_event(notif, notif.data.args[0] as i64, &path)
926 }
927 n if n == libc::SYS_symlinkat => {
930 let target = read_path_for_event(notif, notif.data.args[0], notif_fd)?;
931 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &target)
933 }
934 n if Some(n) == arch::SYS_LINK => {
936 let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?;
937 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path)
938 }
939 n if Some(n) == arch::SYS_RENAME => {
941 let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?;
942 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path)
943 }
944 n if Some(n) == arch::SYS_SYMLINK => {
946 let target = read_path_for_event(notif, notif.data.args[0], notif_fd)?;
947 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &target)
948 }
949 _ => None,
950 }
951}
952
953fn resolve_second_path_for_notif(notif: &SeccompNotif, notif_fd: RawFd) -> Option<String> {
957 let nr = notif.data.nr as i64;
958 match nr {
959 n if n == libc::SYS_renameat2 => {
961 let path = read_path_for_event(notif, notif.data.args[3], notif_fd)?;
962 resolve_at_path_for_event(notif, notif.data.args[2] as i64, &path)
963 }
964 n if n == libc::SYS_linkat => {
968 let path = read_path_for_event(notif, notif.data.args[3], notif_fd)?;
969 resolve_at_path_for_event(notif, notif.data.args[2] as i64, &path)
970 }
971 n if Some(n) == arch::SYS_RENAME => {
973 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
974 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path)
975 }
976 n if Some(n) == arch::SYS_LINK => {
978 let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?;
979 resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path)
980 }
981 _ => None,
982 }
983}
984
985fn read_sockaddr_for_event(notif: &SeccompNotif, addr: u64, len: usize, notif_fd: RawFd)
987 -> (Option<std::net::IpAddr>, Option<u16>)
988{
989 if addr == 0 || len < 4 { return (None, None); }
990 let bytes = match read_child_mem(notif_fd, notif.id, notif.pid, addr, len.min(128)) {
991 Ok(b) => b,
992 Err(_) => return (None, None),
993 };
994 if bytes.len() < 4 { return (None, None); }
995 let family = u16::from_ne_bytes([bytes[0], bytes[1]]);
996 let port = u16::from_be_bytes([bytes[2], bytes[3]]);
997 let ip = match family as u32 {
998 f if f == crate::sys::structs::AF_INET && bytes.len() >= 8 => {
999 Some(std::net::IpAddr::V4(std::net::Ipv4Addr::new(
1000 bytes[4], bytes[5], bytes[6], bytes[7],
1001 )))
1002 }
1003 f if f == crate::sys::structs::AF_INET6 && bytes.len() >= 24 => {
1004 let mut addr = [0u8; 16];
1005 addr.copy_from_slice(&bytes[8..24]);
1006 Some(std::net::IpAddr::V6(std::net::Ipv6Addr::from(addr)))
1007 }
1008 _ => None,
1009 };
1010 (ip, if port > 0 { Some(port) } else { None })
1011}
1012
1013fn read_argv_for_event(notif: &SeccompNotif, argv_ptr: u64, notif_fd: RawFd) -> Option<Vec<String>> {
1016 if argv_ptr == 0 { return None; }
1017 let mut args = Vec::new();
1018 let ptr_size = std::mem::size_of::<u64>();
1019
1020 for i in 0..64u64 {
1021 let ptr_addr = argv_ptr + i * ptr_size as u64;
1022 let ptr_bytes = read_child_mem(notif_fd, notif.id, notif.pid, ptr_addr, ptr_size).ok()?;
1023 let str_ptr = u64::from_ne_bytes(ptr_bytes[..8].try_into().ok()?);
1024 if str_ptr == 0 { break; } if let Some(s) = read_path_for_event(notif, str_ptr, notif_fd) {
1027 args.push(s);
1028 } else {
1029 break;
1030 }
1031 }
1032
1033 if args.is_empty() { None } else { Some(args) }
1034}
1035
1036fn resolve_held_gate(
1043 received: Option<crate::policy_fn::Verdict>,
1044) -> Option<crate::policy_fn::Verdict> {
1045 match received {
1046 Some(v) => Some(v),
1047 None => Some(crate::policy_fn::Verdict::Deny),
1048 }
1049}
1050
1051async fn emit_policy_event(
1054 notif: &SeccompNotif,
1055 action: &NotifAction,
1056 policy_fn_state: &Arc<tokio::sync::Mutex<super::state::PolicyFnState>>,
1057 notif_fd: RawFd,
1058) -> Option<crate::policy_fn::Verdict> {
1059 let pfs = policy_fn_state.lock().await;
1060 let tx = match pfs.event_tx.as_ref() {
1061 Some(tx) => tx.clone(),
1062 None => return None,
1063 };
1064 drop(pfs);
1065
1066 let nr = notif.data.nr as i64;
1067 let denied = matches!(action, NotifAction::Errno(_));
1068 let name = syscall_name(nr);
1069 let category = syscall_category(nr);
1070 let parent_pid = read_ppid(notif.pid);
1071
1072 let mut host = None;
1089 let mut port = None;
1090 let mut size = None;
1091 let mut argv = None;
1092
1093 if !denied && (nr == libc::SYS_execve || nr == libc::SYS_execveat) {
1094 let argv_ptr = if nr == libc::SYS_execveat {
1097 notif.data.args[2]
1098 } else {
1099 notif.data.args[1]
1100 };
1101 argv = read_argv_for_event(notif, argv_ptr, notif_fd);
1102 }
1103
1104 if nr == libc::SYS_connect || nr == libc::SYS_sendto || nr == libc::SYS_bind {
1105 let addr_ptr = notif.data.args[1];
1107 let addr_len = notif.data.args[2] as usize;
1108 let (h, p) = read_sockaddr_for_event(notif, addr_ptr, addr_len, notif_fd);
1109 host = h;
1110 port = p;
1111 }
1112
1113 if nr == libc::SYS_mmap {
1114 size = Some(notif.data.args[1]);
1116 }
1117
1118 let event = crate::policy_fn::SyscallEvent {
1119 syscall: name.to_string(),
1120 category,
1121 pid: notif.pid,
1122 parent_pid,
1123 host,
1124 port,
1125 size,
1126 argv,
1127 denied,
1128 };
1129
1130 let is_held = nr == libc::SYS_execve || nr == libc::SYS_execveat
1133 || nr == libc::SYS_connect || nr == libc::SYS_sendto
1134 || nr == libc::SYS_bind || nr == libc::SYS_openat;
1135
1136 if is_held {
1137 let (gate_tx, gate_rx) = tokio::sync::oneshot::channel();
1138 let _ = tx.send(crate::policy_fn::PolicyEvent {
1139 event,
1140 gate: Some(gate_tx),
1141 });
1142 let received = match tokio::time::timeout(std::time::Duration::from_secs(5), gate_rx).await {
1143 Ok(Ok(verdict)) => Some(verdict),
1144 _ => None, };
1146 resolve_held_gate(received)
1147 } else {
1148 let _ = tx.send(crate::policy_fn::PolicyEvent {
1149 event,
1150 gate: None,
1151 });
1152 None
1153 }
1154}
1155
1156const DEFER_MAX_INFLIGHT: usize = 64;
1166
1167const DEFER_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
1172
1173async fn run_deferred_within(deferred: Deferred, limit: std::time::Duration) -> NotifAction {
1179 match tokio::time::timeout(limit, deferred.run()).await {
1180 Ok(action) => finalize_deferred(action),
1181 Err(_) => {
1182 eprintln!(
1183 "sandlock: deferred handler exceeded {:?}; failing syscall with EIO",
1184 limit
1185 );
1186 NotifAction::Errno(libc::EIO)
1187 }
1188 }
1189}
1190
1191fn spawn_deferred(
1197 fd: RawFd,
1198 id: u64,
1199 deferred: Deferred,
1200 permit: tokio::sync::OwnedSemaphorePermit,
1201) {
1202 tokio::spawn(async move {
1203 let _permit = permit; let action = run_deferred_within(deferred, DEFER_TIMEOUT).await;
1205 let _ = send_response(fd, id, action);
1206 });
1207}
1208
1209async fn handle_notification(
1210 notif: SeccompNotif,
1211 ctx: &Arc<super::ctx::SupervisorCtx>,
1212 dispatch_table: &super::dispatch::DispatchTable,
1213 fd: RawFd,
1214 defer_sem: &Arc<tokio::sync::Semaphore>,
1215) {
1216 let policy = &ctx.policy;
1217
1218 crate::resource::register_child_if_new(ctx, notif.pid as i32).await;
1224
1225 if policy.has_time_start || policy.has_random_seed {
1227 let mut pfs = ctx.procfs.lock().await;
1228 maybe_patch_vdso(notif.pid as i32, &mut pfs, policy);
1229 }
1230
1231 let mut action = {
1233 let nr = notif.data.nr as i64;
1234 let mut path_check_nrs = vec![
1235 libc::SYS_openat, libc::SYS_execve, libc::SYS_execveat,
1236 libc::SYS_linkat, libc::SYS_renameat2, libc::SYS_symlinkat,
1237 ];
1238 path_check_nrs.extend([
1239 arch::SYS_OPEN, arch::SYS_LINK, arch::SYS_RENAME, arch::SYS_SYMLINK,
1240 ].into_iter().flatten());
1241 let should_precheck_denied = policy.chroot_root.is_none()
1242 && path_check_nrs.contains(&nr);
1243 if should_precheck_denied {
1244 let pfs = ctx.policy_fn.lock().await;
1245 if is_path_denied_for_notif(&pfs, ¬if, fd) {
1246 NotifAction::Errno(libc::EACCES)
1247 } else {
1248 drop(pfs);
1249 dispatch_table.dispatch(notif, fd).await
1250 }
1251 } else {
1252 dispatch_table.dispatch(notif, fd).await
1253 }
1254 };
1255
1256 let nr = notif.data.nr as i64;
1257 let fork_counted = matches!(action, NotifAction::Continue)
1258 && crate::resource::fork_counted_on_continue(¬if, fd);
1259
1260 let mut exec_freeze = None;
1274 if matches!(action, NotifAction::Continue)
1275 && policy.argv_safety_required
1276 && crate::freeze::requires_freeze_on_continue(nr)
1277 {
1278 match crate::freeze::freeze_sandbox_for_execve(
1279 &ctx.processes,
1280 notif.pid as i32,
1281 ) {
1282 Ok(outcome) => {
1283 exec_freeze = Some(outcome);
1284 }
1285 Err(e) => {
1286 eprintln!(
1287 "sandlock: argv-safety freeze failed for pid {}: {} \
1288 — denying execve to preserve TOCTOU invariant",
1289 notif.pid, e
1290 );
1291 action = NotifAction::Errno(libc::EPERM);
1292 }
1293 }
1294 }
1295
1296 if let Some(verdict) = emit_policy_event(¬if, &action, &ctx.policy_fn, fd).await {
1300 use crate::policy_fn::Verdict;
1301 match verdict {
1302 Verdict::Deny => { action = NotifAction::Errno(libc::EPERM); }
1303 Verdict::DenyWith(errno) => { action = NotifAction::Errno(errno); }
1304 Verdict::Audit => { }
1305 Verdict::Allow => {}
1306 }
1307 }
1308
1309 if fork_counted && !matches!(action, NotifAction::Continue) {
1310 crate::resource::rollback_fork_count(&ctx.resource).await;
1311 }
1312
1313 let mut creation_trace = None;
1318 if matches!(action, NotifAction::Continue)
1319 && crate::resource::requires_process_creation_tracking(¬if, fd, policy)
1320 {
1321 match crate::resource::prepare_process_creation_tracking(notif.pid as i32).await {
1322 Ok(trace) => {
1323 creation_trace = Some(trace);
1324 }
1325 Err(e) => {
1326 eprintln!(
1327 "sandlock: process-creation tracking failed for pid {}: {} \
1328 — denying fork-like syscall to preserve argv TOCTOU invariant",
1329 notif.pid, e
1330 );
1331 if fork_counted {
1332 crate::resource::rollback_fork_count(&ctx.resource).await;
1333 }
1334 action = NotifAction::Errno(libc::EPERM);
1335 }
1336 }
1337 }
1338
1339 if let NotifAction::Defer(deferred) = action {
1350 if crate::freeze::requires_freeze_on_continue(nr)
1351 || crate::resource::requires_process_creation_tracking(¬if, fd, policy)
1352 {
1353 let _ = send_response(fd, notif.id, NotifAction::Errno(libc::EPERM));
1354 return;
1355 }
1356 match Arc::clone(defer_sem).try_acquire_owned() {
1357 Ok(permit) => spawn_deferred(fd, notif.id, deferred, permit),
1358 Err(_) => {
1361 let _ = send_response(fd, notif.id, NotifAction::Errno(libc::EAGAIN));
1362 }
1363 }
1364 return;
1365 }
1366
1367 let exec_continued = exec_freeze.is_some() && matches!(action, NotifAction::Continue);
1369 let send_result = send_response(fd, notif.id, action);
1370
1371 if let Some(trace) = creation_trace {
1372 if send_result.is_ok() {
1373 match crate::resource::finish_process_creation_tracking(ctx, trace).await {
1374 Ok(true) => {}
1375 Ok(false) => {
1376 crate::resource::rollback_fork_count(&ctx.resource).await;
1377 }
1378 Err(e) => {
1379 crate::resource::rollback_fork_count(&ctx.resource).await;
1380 eprintln!(
1381 "sandlock: process-creation tracking completion failed for pid {}: {}",
1382 notif.pid, e
1383 );
1384 }
1385 }
1386 } else {
1387 crate::resource::rollback_fork_count(&ctx.resource).await;
1388 crate::resource::abort_process_creation_tracking(trace).await;
1389 }
1390 }
1391
1392 if let Some(freeze) = exec_freeze {
1393 if exec_continued && send_result.is_ok() {
1394 crate::freeze::detach_peers(&freeze.peer_tids);
1395 } else {
1396 crate::freeze::detach_all(&freeze);
1397 }
1398 }
1399}
1400
1401pub async fn supervisor(
1413 notif_fd: OwnedFd,
1414 ctx: Arc<super::ctx::SupervisorCtx>,
1415 pending_handlers: Vec<(i64, std::sync::Arc<dyn super::dispatch::Handler>)>,
1416 startup: tokio::sync::oneshot::Sender<io::Result<()>>,
1417) {
1418 let async_fd = match tokio::io::unix::AsyncFd::with_interest(
1421 notif_fd,
1422 tokio::io::Interest::READABLE,
1423 ) {
1424 Ok(fd) => fd,
1425 Err(err) => {
1426 let _ = startup.send(Err(err));
1427 return;
1428 }
1429 };
1430 let fd = async_fd.get_ref().as_raw_fd();
1431
1432 let dispatch_table = Arc::new(super::dispatch::build_dispatch_table(
1434 &ctx.policy,
1435 &ctx.resource,
1436 &ctx,
1437 pending_handlers,
1438 ));
1439
1440 try_set_sync_wakeup(fd);
1442
1443 let _ = startup.send(Ok(()));
1447
1448 let gc = tokio::spawn(process_index_gc(Arc::clone(&ctx.processes)));
1454
1455 let defer_sem = Arc::new(tokio::sync::Semaphore::new(DEFER_MAX_INFLIGHT));
1459
1460 'outer: loop {
1470 let mut ready = match async_fd.readable().await {
1471 Ok(r) => r,
1472 Err(_) => break 'outer,
1473 };
1474 ready.clear_ready();
1475 drop(ready);
1476
1477 loop {
1478 match probe_notif_fd(fd) {
1479 NotifFdState::Pending => {
1480 let notif = match recv_notif(fd) {
1481 Ok(n) => n,
1482 Err(e) if e.raw_os_error() == Some(libc::EINTR) => continue,
1483 Err(_) => break 'outer,
1484 };
1485 handle_notification(notif, &ctx, &dispatch_table, fd, &defer_sem).await;
1486 }
1487 NotifFdState::Empty => break,
1488 NotifFdState::Terminal => break 'outer,
1489 }
1490 }
1491 }
1492
1493 gc.abort();
1494}
1495
1496async fn process_index_gc(processes: Arc<super::state::ProcessIndex>) {
1500 let interval = std::time::Duration::from_secs(300);
1501 loop {
1502 tokio::time::sleep(interval).await;
1503 if processes.len() == 0 {
1504 continue;
1505 }
1506 processes.prune_dead();
1507 }
1508}
1509
1510pub(crate) fn spawn_pid_watcher(
1520 ctx: Arc<super::ctx::SupervisorCtx>,
1521 key: super::state::PidKey,
1522 pidfd: std::os::unix::io::OwnedFd,
1523) {
1524 tokio::spawn(async move {
1525 let async_fd = match tokio::io::unix::AsyncFd::with_interest(
1526 pidfd,
1527 tokio::io::Interest::READABLE,
1528 ) {
1529 Ok(f) => f,
1530 Err(_) => {
1531 cleanup_pid(&ctx, key).await;
1537 return;
1538 }
1539 };
1540 let _ = async_fd.readable().await;
1543 cleanup_pid(&ctx, key).await;
1544 });
1546}
1547
1548pub(crate) async fn cleanup_pid(ctx: &super::ctx::SupervisorCtx, key: super::state::PidKey) {
1554 ctx.processes.unregister(key);
1555}
1556
1557#[cfg(test)]
1562mod tests {
1563 use super::*;
1564 use std::os::unix::io::FromRawFd;
1565
1566 fn gettid() -> u32 {
1567 (unsafe { libc::syscall(libc::SYS_gettid) }) as u32
1568 }
1569
1570 #[test]
1571 fn inject_failure_response_denies_not_continues() {
1572 let resp = inject_failure_resp(123);
1576 assert_eq!(resp.id, 123);
1577 assert_eq!(
1578 resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE,
1579 0,
1580 "fd-injection failure must not respond with CONTINUE"
1581 );
1582 assert_ne!(resp.error, 0, "fd-injection failure must be a denial");
1583 assert_eq!(resp.error, -libc::EACCES);
1584 }
1585
1586 #[test]
1587 fn held_gate_no_decision_denies() {
1588 use crate::policy_fn::Verdict;
1589 assert!(matches!(resolve_held_gate(None), Some(Verdict::Deny)));
1592 }
1593
1594 #[test]
1595 fn held_gate_passes_through_callback_verdict() {
1596 use crate::policy_fn::Verdict;
1597 assert!(matches!(
1599 resolve_held_gate(Some(Verdict::Allow)),
1600 Some(Verdict::Allow)
1601 ));
1602 assert!(matches!(
1603 resolve_held_gate(Some(Verdict::Deny)),
1604 Some(Verdict::Deny)
1605 ));
1606 assert!(matches!(
1607 resolve_held_gate(Some(Verdict::DenyWith(13))),
1608 Some(Verdict::DenyWith(13))
1609 ));
1610 }
1611
1612 #[test]
1613 fn tgid_of_main_thread_is_own_pid() {
1614 assert_eq!(tgid_of(gettid()), Some(std::process::id()));
1616 }
1617
1618 #[test]
1619 fn tgid_of_worker_thread_resolves_to_process() {
1620 let (tid_tx, tid_rx) = std::sync::mpsc::channel();
1622 let (done_tx, done_rx) = std::sync::mpsc::channel::<()>();
1623 let h = std::thread::spawn(move || {
1624 tid_tx.send(gettid()).unwrap();
1625 done_rx.recv().ok(); });
1627 let worker_tid = tid_rx.recv().unwrap();
1628 let pid = std::process::id();
1629 assert_ne!(worker_tid, pid, "worker tid must differ from pid");
1630 assert_eq!(tgid_of(worker_tid), Some(pid));
1631 done_tx.send(()).ok();
1632 h.join().unwrap();
1633 }
1634
1635 #[test]
1636 fn dup_fd_from_pid_handles_worker_thread_fd() {
1637 use std::os::unix::io::AsRawFd;
1638 let (info_tx, info_rx) = std::sync::mpsc::channel();
1642 let (done_tx, done_rx) = std::sync::mpsc::channel::<()>();
1643 let h = std::thread::spawn(move || {
1644 let f = std::fs::File::open("/dev/null").unwrap();
1645 info_tx.send((gettid(), f.as_raw_fd())).unwrap();
1646 done_rx.recv().ok();
1647 drop(f);
1648 });
1649 let (worker_tid, fd) = info_rx.recv().unwrap();
1650 let dup = dup_fd_from_pid(worker_tid, fd);
1651 done_tx.send(()).ok();
1652 h.join().unwrap();
1653 assert!(dup.is_ok(), "dup_fd_from_pid for a worker-thread fd failed: {:?}", dup.err());
1654 }
1655
1656 #[test]
1657 fn read_child_cstr_returns_none_for_null_addr_or_zero_max_len() {
1658 assert!(read_child_cstr(-1, 0, 0, 0, 4096).is_none());
1660 assert!(read_child_cstr(-1, 0, 0, 0xdeadbeef, 0).is_none());
1662 }
1663
1664 #[test]
1665 fn test_notif_action_debug() {
1666 let _ = format!("{:?}", NotifAction::Continue);
1668 let _ = format!("{:?}", NotifAction::Errno(1));
1669 let _ = format!("{:?}", NotifAction::InjectFd { srcfd: 3, targetfd: 4 });
1670 let test_fd = unsafe { OwnedFd::from_raw_fd(libc::dup(2)) };
1672 let _ = format!("{:?}", NotifAction::InjectFdSend { srcfd: test_fd, newfd_flags: 0 });
1673 let _ = format!("{:?}", NotifAction::ReturnValue(42));
1674 let _ = format!("{:?}", NotifAction::Hold);
1675 let _ = format!("{:?}", NotifAction::Kill { sig: 9, pgid: 1 });
1676 let _ = format!("{:?}", NotifAction::defer(async { NotifAction::Continue }));
1677 }
1678
1679 #[tokio::test]
1680 async fn deferred_future_need_not_be_sync() {
1681 use std::cell::Cell;
1686 let action = NotifAction::defer(async move {
1687 let counter = Cell::new(0);
1688 counter.set(counter.get() + 41);
1689 tokio::task::yield_now().await; NotifAction::ReturnValue(counter.get() + 1)
1691 });
1692 let NotifAction::Defer(d) = action else { panic!("expected Defer") };
1693 assert!(matches!(d.run().await, NotifAction::ReturnValue(42)));
1694 }
1695
1696 #[tokio::test]
1697 async fn deferred_runs_to_its_terminal_action() {
1698 let action = NotifAction::defer(async { NotifAction::ReturnValue(7) });
1700 let NotifAction::Defer(deferred) = action else {
1701 panic!("defer() must construct a NotifAction::Defer");
1702 };
1703 assert!(matches!(deferred.run().await, NotifAction::ReturnValue(7)));
1704 }
1705
1706 #[tokio::test(start_paused = true)]
1707 async fn deferred_times_out_to_eio() {
1708 let slow = Deferred::new(async {
1712 tokio::time::sleep(std::time::Duration::from_secs(60)).await;
1713 NotifAction::ReturnValue(7)
1714 });
1715 let action = run_deferred_within(slow, std::time::Duration::from_secs(1)).await;
1716 assert!(matches!(action, NotifAction::Errno(e) if e == libc::EIO));
1717 }
1718
1719 #[tokio::test(start_paused = true)]
1720 async fn deferred_within_limit_passes_through() {
1721 let fast = Deferred::new(async { NotifAction::ReturnValue(7) });
1723 let action = run_deferred_within(fast, std::time::Duration::from_secs(1)).await;
1724 assert!(matches!(action, NotifAction::ReturnValue(7)));
1725 }
1726
1727 #[test]
1728 fn finalize_deferred_collapses_nested_defer_to_eio() {
1729 let nested = NotifAction::defer(async { NotifAction::Continue });
1732 assert!(matches!(finalize_deferred(nested), NotifAction::Errno(e) if e == libc::EIO));
1733 assert!(matches!(finalize_deferred(NotifAction::Continue), NotifAction::Continue));
1735 assert!(matches!(
1736 finalize_deferred(NotifAction::ReturnValue(3)),
1737 NotifAction::ReturnValue(3)
1738 ));
1739 }
1740
1741 #[test]
1742 fn content_memfd_roundtrips_content() {
1743 use std::io::Read;
1744 let fd = content_memfd(b"hello world", true).expect("content_memfd");
1745 let mut f = std::fs::File::from(fd);
1747 let mut buf = String::new();
1748 f.read_to_string(&mut buf).unwrap();
1749 assert_eq!(buf, "hello world");
1750 }
1751
1752 #[test]
1753 fn content_memfd_sealed_applies_write_seal() {
1754 let fd = content_memfd(b"data", true).expect("content_memfd");
1755 let seals = unsafe { libc::fcntl(fd.as_raw_fd(), libc::F_GET_SEALS) };
1756 assert!(seals >= 0, "F_GET_SEALS failed");
1757 assert!(
1758 seals & libc::F_SEAL_WRITE != 0,
1759 "expected F_SEAL_WRITE on a sealed memfd, got {seals:#x}"
1760 );
1761 }
1762
1763 #[test]
1764 fn content_memfd_unsealed_has_no_write_seal() {
1765 let fd = content_memfd(b"data", false).expect("content_memfd");
1766 let seals = unsafe { libc::fcntl(fd.as_raw_fd(), libc::F_GET_SEALS) };
1767 assert!(seals >= 0, "F_GET_SEALS failed");
1768 assert_eq!(
1769 seals & libc::F_SEAL_WRITE,
1770 0,
1771 "unsealed memfd must not carry a write seal, got {seals:#x}"
1772 );
1773 }
1774
1775 #[test]
1776 fn inject_bytes_produces_sealed_cloexec_injectfdsend() {
1777 use std::io::Read;
1778 match NotifAction::inject_bytes(b"payload") {
1779 NotifAction::InjectFdSend { srcfd, newfd_flags } => {
1780 assert_eq!(newfd_flags, libc::O_CLOEXEC as u32);
1781 let seals = unsafe { libc::fcntl(srcfd.as_raw_fd(), libc::F_GET_SEALS) };
1782 assert!(seals & libc::F_SEAL_WRITE != 0, "inject_bytes must seal");
1783 let mut f = std::fs::File::from(srcfd);
1784 let mut buf = String::new();
1785 f.read_to_string(&mut buf).unwrap();
1786 assert_eq!(buf, "payload");
1787 }
1788 other => panic!("expected InjectFdSend, got {other:?}"),
1789 }
1790 }
1791
1792 #[test]
1793 fn test_network_state_new() {
1794 let ns = super::super::state::NetworkState::new();
1795 assert!(matches!(ns.tcp_policy, NetworkPolicy::Unrestricted));
1796 assert!(matches!(ns.udp_policy, NetworkPolicy::Unrestricted));
1797 assert!(matches!(ns.icmp_policy, NetworkPolicy::Unrestricted));
1798 assert!(ns.port_map.bound_ports.is_empty());
1799 }
1800
1801 #[test]
1802 fn test_time_random_state_new() {
1803 let tr = super::super::state::TimeRandomState::new(None, None);
1804 assert!(tr.time_offset.is_none());
1805 assert!(tr.random_state.is_none());
1806 }
1807
1808 #[test]
1809 fn test_resource_state_new() {
1810 let rs = super::super::state::ResourceState::new(1024 * 1024, 10);
1811 assert_eq!(rs.mem_used, 0);
1812 assert_eq!(rs.max_memory_bytes, 1024 * 1024);
1813 assert_eq!(rs.max_processes, 10);
1814 assert!(!rs.hold_forks);
1815 assert!(rs.held_notif_ids.is_empty());
1816 }
1817
1818 #[test]
1819 fn test_process_vm_readv_self() {
1820 let data: u64 = 0xDEADBEEF_CAFEBABE;
1821 let addr = &data as *const u64 as u64;
1822 let pid = std::process::id();
1823 let result = read_child_mem_vm(pid, addr, 8);
1824 assert!(result.is_ok());
1825 let bytes = result.unwrap();
1826 let read_val = u64::from_ne_bytes(bytes[..8].try_into().unwrap());
1827 assert_eq!(read_val, 0xDEADBEEF_CAFEBABE);
1828 }
1829
1830 #[test]
1831 fn test_process_vm_writev_self() {
1832 let mut data: u64 = 0;
1833 let addr = &mut data as *mut u64 as u64;
1834 let pid = std::process::id();
1835 let payload = 0x1234567890ABCDEFu64.to_ne_bytes();
1836 let result = write_child_mem_vm(pid, addr, &payload);
1837 assert!(result.is_ok());
1838 assert_eq!(data, 0x1234567890ABCDEF);
1839 }
1840}