1use crate::error::{NucleusError, Result};
2use crate::security::policy::sha256_hex;
3use seccompiler::{BpfProgram, SeccompAction, SeccompCondition, SeccompFilter, SeccompRule};
4use std::collections::BTreeMap;
5use std::path::Path;
6use tracing::{debug, info, warn};
7
8pub struct SeccompManager {
13 applied: bool,
14}
15
16const DENIED_CLONE_NAMESPACE_FLAGS: u64 = (libc::CLONE_NEWUSER
17 | libc::CLONE_NEWNS
18 | libc::CLONE_NEWNET
19 | libc::CLONE_NEWIPC
20 | libc::CLONE_NEWUTS
21 | libc::CLONE_NEWPID
22 | libc::CLONE_NEWCGROUP
23 | libc::CLONE_NEWTIME) as u64;
24
25impl SeccompManager {
26 pub fn new() -> Self {
27 Self { applied: false }
28 }
29
30 fn base_allowed_syscalls() -> Vec<i64> {
31 vec![
32 libc::SYS_read,
34 libc::SYS_write,
35 libc::SYS_open,
36 libc::SYS_openat,
37 libc::SYS_close,
38 libc::SYS_stat,
39 libc::SYS_fstat,
40 libc::SYS_lstat,
41 libc::SYS_lseek,
42 libc::SYS_access,
43 libc::SYS_fcntl,
44 libc::SYS_readv,
45 libc::SYS_writev,
46 libc::SYS_pread64,
47 libc::SYS_pwrite64,
48 libc::SYS_readlink,
49 libc::SYS_readlinkat,
50 libc::SYS_newfstatat,
51 libc::SYS_statx,
52 libc::SYS_faccessat,
53 libc::SYS_faccessat2,
54 libc::SYS_dup,
55 libc::SYS_dup2,
56 libc::SYS_dup3,
57 libc::SYS_pipe,
58 libc::SYS_pipe2,
59 libc::SYS_unlink,
60 libc::SYS_unlinkat,
61 libc::SYS_rename,
62 libc::SYS_renameat,
63 libc::SYS_renameat2,
64 libc::SYS_link,
65 libc::SYS_linkat,
66 libc::SYS_symlink,
67 libc::SYS_symlinkat,
68 libc::SYS_chmod,
69 libc::SYS_fchmod,
70 libc::SYS_fchmodat,
71 libc::SYS_truncate,
72 libc::SYS_ftruncate,
73 libc::SYS_fallocate,
74 libc::SYS_fadvise64,
75 libc::SYS_fsync,
76 libc::SYS_fdatasync,
77 libc::SYS_flock,
78 libc::SYS_sendfile,
79 libc::SYS_copy_file_range,
80 libc::SYS_splice,
81 libc::SYS_tee,
82 libc::SYS_mmap,
84 libc::SYS_munmap,
85 libc::SYS_brk,
86 libc::SYS_mremap,
87 libc::SYS_madvise,
88 libc::SYS_msync,
89 libc::SYS_execve,
95 libc::SYS_wait4,
97 libc::SYS_waitid,
98 libc::SYS_exit,
99 libc::SYS_exit_group,
100 libc::SYS_getpid,
101 libc::SYS_gettid,
102 libc::SYS_getuid,
103 libc::SYS_getgid,
104 libc::SYS_geteuid,
105 libc::SYS_getegid,
106 libc::SYS_getppid,
107 libc::SYS_getpgrp,
108 libc::SYS_setsid,
109 libc::SYS_getgroups,
110 libc::SYS_rt_sigaction,
112 libc::SYS_rt_sigprocmask,
113 libc::SYS_rt_sigreturn,
114 libc::SYS_rt_sigsuspend,
115 libc::SYS_sigaltstack,
116 libc::SYS_kill,
117 libc::SYS_tgkill,
118 libc::SYS_clock_gettime,
120 libc::SYS_clock_getres,
121 libc::SYS_clock_nanosleep,
122 libc::SYS_gettimeofday,
123 libc::SYS_nanosleep,
124 libc::SYS_getcwd,
126 libc::SYS_chdir,
127 libc::SYS_fchdir,
128 libc::SYS_mkdir,
129 libc::SYS_mkdirat,
130 libc::SYS_rmdir,
131 libc::SYS_getdents,
132 libc::SYS_getdents64,
133 libc::SYS_uname,
135 libc::SYS_getrandom,
136 libc::SYS_futex,
137 libc::SYS_set_tid_address,
138 libc::SYS_set_robust_list,
139 libc::SYS_get_robust_list,
140 libc::SYS_arch_prctl,
141 libc::SYS_sysinfo,
142 libc::SYS_umask,
143 libc::SYS_getrlimit,
144 libc::SYS_prlimit64,
145 libc::SYS_getrusage,
146 libc::SYS_times,
147 libc::SYS_sched_yield,
148 libc::SYS_sched_getaffinity,
149 libc::SYS_getcpu,
150 libc::SYS_rseq,
151 libc::SYS_close_range,
152 libc::SYS_landlock_create_ruleset,
156 libc::SYS_landlock_add_rule,
157 libc::SYS_landlock_restrict_self,
158 libc::SYS_getsockname,
160 libc::SYS_getpeername,
161 libc::SYS_socketpair,
162 libc::SYS_getsockopt,
163 libc::SYS_poll,
165 libc::SYS_ppoll,
166 libc::SYS_select,
167 libc::SYS_pselect6,
168 libc::SYS_epoll_create,
169 libc::SYS_epoll_create1,
170 libc::SYS_epoll_ctl,
171 libc::SYS_epoll_wait,
172 libc::SYS_epoll_pwait,
173 libc::SYS_eventfd,
174 libc::SYS_eventfd2,
175 libc::SYS_signalfd,
176 libc::SYS_signalfd4,
177 libc::SYS_timerfd_create,
178 libc::SYS_timerfd_settime,
179 libc::SYS_timerfd_gettime,
180 ]
181 }
182
183 fn allowed_socket_domains(allow_network: bool) -> Vec<i32> {
184 if allow_network {
185 vec![libc::AF_UNIX, libc::AF_INET, libc::AF_INET6]
186 } else {
187 vec![libc::AF_UNIX]
188 }
189 }
190
191 fn network_mode_syscalls(allow_network: bool) -> Vec<i64> {
192 if allow_network {
193 vec![
194 libc::SYS_connect,
195 libc::SYS_sendto,
196 libc::SYS_recvfrom,
197 libc::SYS_sendmsg,
198 libc::SYS_recvmsg,
199 libc::SYS_shutdown,
200 libc::SYS_bind,
201 libc::SYS_listen,
202 libc::SYS_accept,
203 libc::SYS_accept4,
204 libc::SYS_setsockopt,
205 ]
206 } else {
207 Vec::new()
208 }
209 }
210
211 fn minimal_filter(allow_network: bool) -> Result<BTreeMap<i64, Vec<SeccompRule>>> {
221 let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
222
223 let allowed_syscalls = Self::base_allowed_syscalls();
225
226 for syscall in allowed_syscalls {
228 rules.insert(syscall, Vec::new());
229 }
230
231 for syscall in Self::network_mode_syscalls(allow_network) {
233 rules.insert(syscall, Vec::new());
234 }
235
236 let mut socket_rules = Vec::new();
239 for domain in Self::allowed_socket_domains(allow_network) {
240 let condition = SeccompCondition::new(
241 0, seccompiler::SeccompCmpArgLen::Dword,
243 seccompiler::SeccompCmpOp::Eq,
244 domain as u64,
245 )
246 .map_err(|e| {
247 NucleusError::SeccompError(format!(
248 "Failed to create socket domain condition: {}",
249 e
250 ))
251 })?;
252 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
253 NucleusError::SeccompError(format!("Failed to create socket rule: {}", e))
254 })?;
255 socket_rules.push(rule);
256 }
257 rules.insert(libc::SYS_socket, socket_rules);
258
259 let ioctl_allowed: &[u64] = &[
261 0x5401, 0x5402, 0x5403, 0x5404, 0x540B, 0x540F, 0x5410, 0x5413, 0x5429, 0x541B, 0x5451, 0x5450, ];
276 let mut ioctl_rules = Vec::new();
277 for &request in ioctl_allowed {
278 let condition = SeccompCondition::new(
279 1, seccompiler::SeccompCmpArgLen::Dword,
281 seccompiler::SeccompCmpOp::Eq,
282 request,
283 )
284 .map_err(|e| {
285 NucleusError::SeccompError(format!("Failed to create ioctl condition: {}", e))
286 })?;
287 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
288 NucleusError::SeccompError(format!("Failed to create ioctl rule: {}", e))
289 })?;
290 ioctl_rules.push(rule);
291 }
292 rules.insert(libc::SYS_ioctl, ioctl_rules);
293
294 let prctl_allowed: &[u64] = &[
300 1, 2, 15, 16, 38, 39, ];
307 let mut prctl_rules = Vec::new();
308 for &option in prctl_allowed {
309 let condition = SeccompCondition::new(
310 0, seccompiler::SeccompCmpArgLen::Dword,
312 seccompiler::SeccompCmpOp::Eq,
313 option,
314 )
315 .map_err(|e| {
316 NucleusError::SeccompError(format!("Failed to create prctl condition: {}", e))
317 })?;
318 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
319 NucleusError::SeccompError(format!("Failed to create prctl rule: {}", e))
320 })?;
321 prctl_rules.push(rule);
322 }
323 rules.insert(libc::SYS_prctl, prctl_rules);
324
325 let mut mprotect_rules = Vec::new();
327 for allowed in [0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64] {
328 let condition = SeccompCondition::new(
329 2, seccompiler::SeccompCmpArgLen::Dword,
331 seccompiler::SeccompCmpOp::MaskedEq((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
332 allowed,
333 )
334 .map_err(|e| {
335 NucleusError::SeccompError(format!("Failed to create mprotect condition: {}", e))
336 })?;
337 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
338 NucleusError::SeccompError(format!("Failed to create mprotect rule: {}", e))
339 })?;
340 mprotect_rules.push(rule);
341 }
342 rules.insert(libc::SYS_mprotect, mprotect_rules);
343
344 rules.insert(libc::SYS_clone3, Vec::new());
356
357 let clone_condition = SeccompCondition::new(
359 0, seccompiler::SeccompCmpArgLen::Qword,
361 seccompiler::SeccompCmpOp::MaskedEq(DENIED_CLONE_NAMESPACE_FLAGS),
362 0, )
364 .map_err(|e| {
365 NucleusError::SeccompError(format!("Failed to create clone condition: {}", e))
366 })?;
367 let clone_rule = SeccompRule::new(vec![clone_condition]).map_err(|e| {
368 NucleusError::SeccompError(format!("Failed to create clone rule: {}", e))
369 })?;
370 rules.insert(libc::SYS_clone, vec![clone_rule]);
371
372 let execveat_condition = SeccompCondition::new(
379 4, seccompiler::SeccompCmpArgLen::Dword,
381 seccompiler::SeccompCmpOp::MaskedEq(libc::AT_EMPTY_PATH as u64),
382 0, )
384 .map_err(|e| {
385 NucleusError::SeccompError(format!("Failed to create execveat condition: {}", e))
386 })?;
387 let execveat_rule = SeccompRule::new(vec![execveat_condition]).map_err(|e| {
388 NucleusError::SeccompError(format!("Failed to create execveat rule: {}", e))
389 })?;
390 rules.insert(libc::SYS_execveat, vec![execveat_rule]);
391
392 Ok(rules)
393 }
394
395 pub fn compile_minimal_filter() -> Result<BpfProgram> {
400 let rules = Self::minimal_filter(true)?;
401 let filter = SeccompFilter::new(
402 rules,
403 SeccompAction::Errno(libc::EPERM as u32),
404 SeccompAction::Allow,
405 std::env::consts::ARCH.try_into().map_err(|e| {
406 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
407 })?,
408 )
409 .map_err(|e| {
410 NucleusError::SeccompError(format!("Failed to create seccomp filter: {}", e))
411 })?;
412
413 let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
414 NucleusError::SeccompError(format!("Failed to compile BPF program: {}", e))
415 })?;
416
417 Ok(bpf_prog)
418 }
419
420 pub fn apply_minimal_filter(&mut self) -> Result<bool> {
428 self.apply_minimal_filter_with_mode(false, false)
429 }
430
431 pub fn apply_minimal_filter_with_mode(
436 &mut self,
437 best_effort: bool,
438 log_denied: bool,
439 ) -> Result<bool> {
440 self.apply_filter_for_network_mode(true, best_effort, log_denied)
441 }
442
443 pub fn apply_filter_for_network_mode(
452 &mut self,
453 allow_network: bool,
454 best_effort: bool,
455 log_denied: bool,
456 ) -> Result<bool> {
457 if self.applied {
458 debug!("Seccomp filter already applied, skipping");
459 return Ok(true);
460 }
461
462 info!(allow_network, "Applying seccomp filter");
463
464 let rules = match Self::minimal_filter(allow_network) {
465 Ok(r) => r,
466 Err(e) => {
467 if best_effort {
468 warn!(
469 "Failed to create seccomp rules: {} (continuing without seccomp)",
470 e
471 );
472 return Ok(false);
473 }
474 return Err(e);
475 }
476 };
477
478 let filter = match SeccompFilter::new(
479 rules,
480 SeccompAction::Errno(libc::EPERM as u32), SeccompAction::Allow, std::env::consts::ARCH.try_into().map_err(|e| {
483 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
484 })?,
485 ) {
486 Ok(f) => f,
487 Err(e) => {
488 if best_effort {
489 warn!(
490 "Failed to create seccomp filter: {} (continuing without seccomp)",
491 e
492 );
493 return Ok(false);
494 }
495 return Err(NucleusError::SeccompError(format!(
496 "Failed to create seccomp filter: {}",
497 e
498 )));
499 }
500 };
501
502 let bpf_prog: BpfProgram = match filter.try_into() {
503 Ok(p) => p,
504 Err(e) => {
505 if best_effort {
506 warn!(
507 "Failed to compile BPF program: {} (continuing without seccomp)",
508 e
509 );
510 return Ok(false);
511 }
512 return Err(NucleusError::SeccompError(format!(
513 "Failed to compile BPF program: {}",
514 e
515 )));
516 }
517 };
518
519 match Self::apply_bpf_program(&bpf_prog, log_denied) {
521 Ok(_) => {
522 self.applied = true;
523 info!("Successfully applied seccomp filter");
524 Ok(true)
525 }
526 Err(e) => {
527 if best_effort {
528 warn!(
529 "Failed to apply seccomp filter: {} (continuing without seccomp)",
530 e
531 );
532 Ok(false)
533 } else {
534 Err(NucleusError::SeccompError(format!(
535 "Failed to apply seccomp filter: {}",
536 e
537 )))
538 }
539 }
540 }
541 }
542
543 pub fn apply_profile_from_file(
562 &mut self,
563 profile_path: &Path,
564 expected_sha256: Option<&str>,
565 audit_mode: bool,
566 ) -> Result<bool> {
567 if self.applied {
568 debug!("Seccomp filter already applied, skipping");
569 return Ok(true);
570 }
571
572 info!("Loading seccomp profile from {:?}", profile_path);
573
574 let content = std::fs::read(profile_path).map_err(|e| {
576 NucleusError::SeccompError(format!(
577 "Failed to read seccomp profile {:?}: {}",
578 profile_path, e
579 ))
580 })?;
581
582 if let Some(expected) = expected_sha256 {
584 let actual = sha256_hex(&content);
585 if actual != expected {
586 return Err(NucleusError::SeccompError(format!(
587 "Seccomp profile hash mismatch: expected {}, got {}",
588 expected, actual
589 )));
590 }
591 info!("Seccomp profile hash verified: {}", actual);
592 }
593
594 let profile: SeccompProfile = serde_json::from_slice(&content).map_err(|e| {
596 NucleusError::SeccompError(format!("Failed to parse seccomp profile: {}", e))
597 })?;
598
599 Self::warn_missing_arg_filters(&profile);
604
605 let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
607
608 for syscall_group in &profile.syscalls {
609 if syscall_group.action == "SCMP_ACT_ALLOW" {
610 for name in &syscall_group.names {
611 if let Some(nr) = syscall_name_to_number(name) {
612 rules.insert(nr, Vec::new());
613 } else {
614 warn!("Unknown syscall in profile: {} (skipping)", name);
615 }
616 }
617 }
618 }
619
620 let builtin_rules = Self::minimal_filter(true)?;
625 for syscall_name in Self::ARG_FILTERED_SYSCALLS {
626 if let Some(nr) = syscall_name_to_number(syscall_name) {
627 if let std::collections::btree_map::Entry::Occupied(mut entry) = rules.entry(nr) {
628 if let Some(builtin) = builtin_rules.get(&nr) {
629 if !builtin.is_empty() {
630 info!(
631 "Merging built-in argument filters for '{}' into custom profile",
632 syscall_name
633 );
634 entry.insert(builtin.clone());
635 }
636 }
637 }
638 }
639 }
640 rules.remove(&libc::SYS_clone3);
642
643 let filter = SeccompFilter::new(
644 rules,
645 SeccompAction::Errno(libc::EPERM as u32),
646 SeccompAction::Allow,
647 std::env::consts::ARCH.try_into().map_err(|e| {
648 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
649 })?,
650 )
651 .map_err(|e| {
652 NucleusError::SeccompError(format!(
653 "Failed to create seccomp filter from profile: {}",
654 e
655 ))
656 })?;
657
658 let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
659 NucleusError::SeccompError(format!("Failed to compile BPF program from profile: {}", e))
660 })?;
661
662 match Self::apply_bpf_program(&bpf_prog, audit_mode) {
663 Ok(_) => {
664 self.applied = true;
665 info!(
666 "Seccomp profile applied from {:?} (log_denied={})",
667 profile_path, audit_mode
668 );
669 Ok(true)
670 }
671 Err(e) => Err(e),
672 }
673 }
674
675 pub fn apply_trace_filter(&mut self) -> Result<bool> {
680 if self.applied {
681 debug!("Seccomp filter already applied, skipping trace filter");
682 return Ok(true);
683 }
684
685 info!("Applying seccomp trace filter (allow-all + LOG)");
686
687 let filter = SeccompFilter::new(
691 BTreeMap::new(),
692 SeccompAction::Allow, SeccompAction::Allow, std::env::consts::ARCH.try_into().map_err(|e| {
695 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
696 })?,
697 )
698 .map_err(|e| NucleusError::SeccompError(format!("Failed to create trace filter: {}", e)))?;
699
700 let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
701 NucleusError::SeccompError(format!("Failed to compile trace BPF: {}", e))
702 })?;
703
704 Self::apply_bpf_program(&bpf_prog, true)?;
706 self.applied = true;
707 info!("Seccomp trace filter applied (all syscalls allowed + logged)");
708 Ok(true)
709 }
710
711 const ARG_FILTERED_SYSCALLS: &'static [&'static str] =
714 &["clone", "clone3", "execveat", "ioctl", "mprotect", "prctl", "socket"];
715
716 fn warn_missing_arg_filters(profile: &SeccompProfile) {
719 for group in &profile.syscalls {
720 if group.action != "SCMP_ACT_ALLOW" {
721 continue;
722 }
723 for name in &group.names {
724 if Self::ARG_FILTERED_SYSCALLS.contains(&name.as_str()) && group.args.is_empty() {
725 warn!(
726 "Custom seccomp profile allows '{}' without argument filters. \
727 The built-in filter restricts this syscall at the argument level. \
728 This profile weakens security compared to the default.",
729 name
730 );
731 }
732 }
733 }
734 }
735
736 pub fn is_applied(&self) -> bool {
738 self.applied
739 }
740
741 fn apply_bpf_program(bpf_prog: &BpfProgram, log_denied: bool) -> Result<()> {
742 let mut flags: libc::c_ulong = 0;
743 if log_denied {
744 flags |= libc::SECCOMP_FILTER_FLAG_LOG as libc::c_ulong;
745 }
746
747 match Self::apply_bpf_program_with_flags(bpf_prog, flags) {
748 Ok(()) => Ok(()),
749 Err(err)
750 if log_denied
751 && err.raw_os_error() == Some(libc::EINVAL)
752 && libc::SECCOMP_FILTER_FLAG_LOG != 0 =>
753 {
754 warn!(
755 "Kernel rejected SECCOMP_FILTER_FLAG_LOG; continuing with seccomp \
756 enforcement without deny logging"
757 );
758 Self::apply_bpf_program_with_flags(bpf_prog, 0)?;
759 Ok(())
760 }
761 Err(err) => Err(NucleusError::SeccompError(format!(
762 "Failed to apply seccomp filter: {}",
763 err
764 ))),
765 }
766 }
767
768 fn apply_bpf_program_with_flags(
769 bpf_prog: &BpfProgram,
770 flags: libc::c_ulong,
771 ) -> std::io::Result<()> {
772 let rc = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
775 if rc != 0 {
776 return Err(std::io::Error::last_os_error());
777 }
778
779 let prog = libc::sock_fprog {
780 len: bpf_prog.len() as u16,
781 filter: bpf_prog.as_ptr() as *mut libc::sock_filter,
782 };
783
784 let rc = unsafe {
787 libc::syscall(
788 libc::SYS_seccomp,
789 libc::SECCOMP_SET_MODE_FILTER,
790 flags,
791 &prog as *const libc::sock_fprog,
792 )
793 };
794
795 if rc < 0 {
796 return Err(std::io::Error::last_os_error());
797 }
798
799 Ok(())
800 }
801}
802
803use crate::security::seccomp_generate::SeccompProfile;
805
806fn syscall_name_to_number(name: &str) -> Option<i64> {
810 match name {
811 "read" => Some(libc::SYS_read),
813 "write" => Some(libc::SYS_write),
814 "open" => Some(libc::SYS_open),
815 "openat" => Some(libc::SYS_openat),
816 "close" => Some(libc::SYS_close),
817 "stat" => Some(libc::SYS_stat),
818 "fstat" => Some(libc::SYS_fstat),
819 "lstat" => Some(libc::SYS_lstat),
820 "lseek" => Some(libc::SYS_lseek),
821 "access" => Some(libc::SYS_access),
822 "fcntl" => Some(libc::SYS_fcntl),
823 "readv" => Some(libc::SYS_readv),
824 "writev" => Some(libc::SYS_writev),
825 "pread64" => Some(libc::SYS_pread64),
826 "pwrite64" => Some(libc::SYS_pwrite64),
827 "readlink" => Some(libc::SYS_readlink),
828 "readlinkat" => Some(libc::SYS_readlinkat),
829 "newfstatat" => Some(libc::SYS_newfstatat),
830 "statx" => Some(libc::SYS_statx),
831 "faccessat" => Some(libc::SYS_faccessat),
832 "faccessat2" => Some(libc::SYS_faccessat2),
833 "dup" => Some(libc::SYS_dup),
834 "dup2" => Some(libc::SYS_dup2),
835 "dup3" => Some(libc::SYS_dup3),
836 "pipe" => Some(libc::SYS_pipe),
837 "pipe2" => Some(libc::SYS_pipe2),
838 "unlink" => Some(libc::SYS_unlink),
839 "unlinkat" => Some(libc::SYS_unlinkat),
840 "rename" => Some(libc::SYS_rename),
841 "renameat" => Some(libc::SYS_renameat),
842 "renameat2" => Some(libc::SYS_renameat2),
843 "link" => Some(libc::SYS_link),
844 "linkat" => Some(libc::SYS_linkat),
845 "symlink" => Some(libc::SYS_symlink),
846 "symlinkat" => Some(libc::SYS_symlinkat),
847 "chmod" => Some(libc::SYS_chmod),
848 "fchmod" => Some(libc::SYS_fchmod),
849 "fchmodat" => Some(libc::SYS_fchmodat),
850 "truncate" => Some(libc::SYS_truncate),
851 "ftruncate" => Some(libc::SYS_ftruncate),
852 "fallocate" => Some(libc::SYS_fallocate),
853 "fadvise64" => Some(libc::SYS_fadvise64),
854 "fsync" => Some(libc::SYS_fsync),
855 "fdatasync" => Some(libc::SYS_fdatasync),
856 "flock" => Some(libc::SYS_flock),
857 "sendfile" => Some(libc::SYS_sendfile),
858 "copy_file_range" => Some(libc::SYS_copy_file_range),
859 "splice" => Some(libc::SYS_splice),
860 "tee" => Some(libc::SYS_tee),
861 "mmap" => Some(libc::SYS_mmap),
863 "munmap" => Some(libc::SYS_munmap),
864 "mprotect" => Some(libc::SYS_mprotect),
865 "brk" => Some(libc::SYS_brk),
866 "mremap" => Some(libc::SYS_mremap),
867 "madvise" => Some(libc::SYS_madvise),
868 "msync" => Some(libc::SYS_msync),
869 "mlock" => Some(libc::SYS_mlock),
870 "munlock" => Some(libc::SYS_munlock),
871 "fork" => Some(libc::SYS_fork),
873 "clone" => Some(libc::SYS_clone),
874 "clone3" => Some(libc::SYS_clone3),
875 "execve" => Some(libc::SYS_execve),
876 "execveat" => Some(libc::SYS_execveat),
877 "wait4" => Some(libc::SYS_wait4),
878 "waitid" => Some(libc::SYS_waitid),
879 "exit" => Some(libc::SYS_exit),
880 "exit_group" => Some(libc::SYS_exit_group),
881 "getpid" => Some(libc::SYS_getpid),
882 "gettid" => Some(libc::SYS_gettid),
883 "getuid" => Some(libc::SYS_getuid),
884 "getgid" => Some(libc::SYS_getgid),
885 "geteuid" => Some(libc::SYS_geteuid),
886 "getegid" => Some(libc::SYS_getegid),
887 "getppid" => Some(libc::SYS_getppid),
888 "getpgrp" => Some(libc::SYS_getpgrp),
889 "setsid" => Some(libc::SYS_setsid),
890 "getgroups" => Some(libc::SYS_getgroups),
891 "rt_sigaction" => Some(libc::SYS_rt_sigaction),
893 "rt_sigprocmask" => Some(libc::SYS_rt_sigprocmask),
894 "rt_sigreturn" => Some(libc::SYS_rt_sigreturn),
895 "rt_sigsuspend" => Some(libc::SYS_rt_sigsuspend),
896 "sigaltstack" => Some(libc::SYS_sigaltstack),
897 "kill" => Some(libc::SYS_kill),
898 "tgkill" => Some(libc::SYS_tgkill),
899 "clock_gettime" => Some(libc::SYS_clock_gettime),
901 "clock_getres" => Some(libc::SYS_clock_getres),
902 "clock_nanosleep" => Some(libc::SYS_clock_nanosleep),
903 "gettimeofday" => Some(libc::SYS_gettimeofday),
904 "nanosleep" => Some(libc::SYS_nanosleep),
905 "getcwd" => Some(libc::SYS_getcwd),
907 "chdir" => Some(libc::SYS_chdir),
908 "fchdir" => Some(libc::SYS_fchdir),
909 "mkdir" => Some(libc::SYS_mkdir),
910 "mkdirat" => Some(libc::SYS_mkdirat),
911 "rmdir" => Some(libc::SYS_rmdir),
912 "getdents" => Some(libc::SYS_getdents),
913 "getdents64" => Some(libc::SYS_getdents64),
914 "socket" => Some(libc::SYS_socket),
916 "connect" => Some(libc::SYS_connect),
917 "sendto" => Some(libc::SYS_sendto),
918 "recvfrom" => Some(libc::SYS_recvfrom),
919 "sendmsg" => Some(libc::SYS_sendmsg),
920 "recvmsg" => Some(libc::SYS_recvmsg),
921 "shutdown" => Some(libc::SYS_shutdown),
922 "bind" => Some(libc::SYS_bind),
923 "listen" => Some(libc::SYS_listen),
924 "accept" => Some(libc::SYS_accept),
925 "accept4" => Some(libc::SYS_accept4),
926 "setsockopt" => Some(libc::SYS_setsockopt),
927 "getsockopt" => Some(libc::SYS_getsockopt),
928 "getsockname" => Some(libc::SYS_getsockname),
929 "getpeername" => Some(libc::SYS_getpeername),
930 "socketpair" => Some(libc::SYS_socketpair),
931 "poll" => Some(libc::SYS_poll),
933 "ppoll" => Some(libc::SYS_ppoll),
934 "select" => Some(libc::SYS_select),
935 "pselect6" => Some(libc::SYS_pselect6),
936 "epoll_create" => Some(libc::SYS_epoll_create),
937 "epoll_create1" => Some(libc::SYS_epoll_create1),
938 "epoll_ctl" => Some(libc::SYS_epoll_ctl),
939 "epoll_wait" => Some(libc::SYS_epoll_wait),
940 "epoll_pwait" => Some(libc::SYS_epoll_pwait),
941 "eventfd" => Some(libc::SYS_eventfd),
942 "eventfd2" => Some(libc::SYS_eventfd2),
943 "signalfd" => Some(libc::SYS_signalfd),
944 "signalfd4" => Some(libc::SYS_signalfd4),
945 "timerfd_create" => Some(libc::SYS_timerfd_create),
946 "timerfd_settime" => Some(libc::SYS_timerfd_settime),
947 "timerfd_gettime" => Some(libc::SYS_timerfd_gettime),
948 "uname" => Some(libc::SYS_uname),
950 "getrandom" => Some(libc::SYS_getrandom),
951 "futex" => Some(libc::SYS_futex),
952 "set_tid_address" => Some(libc::SYS_set_tid_address),
953 "set_robust_list" => Some(libc::SYS_set_robust_list),
954 "get_robust_list" => Some(libc::SYS_get_robust_list),
955 "arch_prctl" => Some(libc::SYS_arch_prctl),
956 "sysinfo" => Some(libc::SYS_sysinfo),
957 "umask" => Some(libc::SYS_umask),
958 "getrlimit" => Some(libc::SYS_getrlimit),
959 "prlimit64" => Some(libc::SYS_prlimit64),
960 "getrusage" => Some(libc::SYS_getrusage),
961 "times" => Some(libc::SYS_times),
962 "sched_yield" => Some(libc::SYS_sched_yield),
963 "sched_getaffinity" => Some(libc::SYS_sched_getaffinity),
964 "getcpu" => Some(libc::SYS_getcpu),
965 "rseq" => Some(libc::SYS_rseq),
966 "close_range" => Some(libc::SYS_close_range),
967 "memfd_create" => Some(libc::SYS_memfd_create),
968 "ioctl" => Some(libc::SYS_ioctl),
969 "prctl" => Some(libc::SYS_prctl),
970 "landlock_create_ruleset" => Some(libc::SYS_landlock_create_ruleset),
972 "landlock_add_rule" => Some(libc::SYS_landlock_add_rule),
973 "landlock_restrict_self" => Some(libc::SYS_landlock_restrict_self),
974 _ => None,
975 }
976}
977
978impl Default for SeccompManager {
979 fn default() -> Self {
980 Self::new()
981 }
982}
983
984#[cfg(test)]
985mod tests {
986 use super::*;
987
988 #[test]
989 fn test_seccomp_manager_initial_state() {
990 let mgr = SeccompManager::new();
991 assert!(!mgr.is_applied());
992 }
993
994 #[test]
995 fn test_apply_idempotent() {
996 let mgr = SeccompManager::new();
997 assert!(!mgr.is_applied());
1001 }
1002
1003 #[test]
1004 fn test_clone_denied_flags_include_newcgroup() {
1005 assert_ne!(
1006 DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWCGROUP as u64,
1007 0
1008 );
1009 }
1010
1011 #[test]
1012 fn test_clone_denied_flags_include_newtime() {
1013 assert_ne!(
1014 DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWTIME as u64,
1015 0,
1016 "CLONE_NEWTIME must be in denied clone namespace flags"
1017 );
1018 }
1019
1020 #[test]
1021 fn test_network_none_socket_domains_are_unix_only() {
1022 let domains = SeccompManager::allowed_socket_domains(false);
1023 assert_eq!(domains, vec![libc::AF_UNIX]);
1024 }
1025
1026 #[test]
1027 fn test_network_enabled_socket_domains_exclude_netlink() {
1028 let domains = SeccompManager::allowed_socket_domains(true);
1029 assert!(domains.contains(&libc::AF_UNIX));
1030 assert!(domains.contains(&libc::AF_INET));
1031 assert!(domains.contains(&libc::AF_INET6));
1032 assert!(!domains.contains(&libc::AF_NETLINK));
1033 }
1034
1035 #[test]
1036 fn test_network_mode_syscalls_only_enabled_when_network_allowed() {
1037 let none = SeccompManager::network_mode_syscalls(false);
1038 assert!(none.is_empty());
1039
1040 let enabled = SeccompManager::network_mode_syscalls(true);
1041 assert!(enabled.contains(&libc::SYS_connect));
1042 assert!(enabled.contains(&libc::SYS_bind));
1043 assert!(enabled.contains(&libc::SYS_listen));
1044 assert!(enabled.contains(&libc::SYS_accept));
1045 assert!(enabled.contains(&libc::SYS_setsockopt));
1046 }
1047
1048 #[test]
1049 fn test_landlock_bootstrap_syscalls_present_in_base_allowlist() {
1050 let base = SeccompManager::base_allowed_syscalls();
1051 assert!(base.contains(&libc::SYS_landlock_create_ruleset));
1052 assert!(base.contains(&libc::SYS_landlock_add_rule));
1053 assert!(base.contains(&libc::SYS_landlock_restrict_self));
1054 }
1055
1056 #[test]
1057 fn test_x32_legacy_range_not_allowlisted() {
1058 let base = SeccompManager::base_allowed_syscalls();
1059 let net = SeccompManager::network_mode_syscalls(true);
1060 for nr in 512_i64..=547_i64 {
1061 assert!(
1062 !base.contains(&nr) && !net.contains(&nr),
1063 "x32 syscall number {} unexpectedly allowlisted",
1064 nr
1065 );
1066 }
1067 }
1068
1069 #[test]
1070 fn test_i386_compat_socketcall_range_not_allowlisted() {
1071 let base = SeccompManager::base_allowed_syscalls();
1072 let net = SeccompManager::network_mode_syscalls(true);
1073 for nr in 359_i64..=373_i64 {
1076 assert!(
1077 !base.contains(&nr) && !net.contains(&nr),
1078 "i386 compat syscall number {} unexpectedly allowlisted",
1079 nr
1080 );
1081 }
1082 }
1083
1084 #[test]
1085 fn test_minimal_filter_allowlist_counts_are_stable() {
1086 let base = SeccompManager::base_allowed_syscalls();
1087 let net = SeccompManager::network_mode_syscalls(true);
1088
1089 assert_eq!(base.len(), 131);
1095 assert_eq!(net.len(), 11);
1096 assert_eq!(base.len() + 7, 138);
1097 assert_eq!(base.len() + net.len() + 7, 149);
1098 }
1099
1100 #[test]
1101 fn test_arg_filtered_syscalls_list_includes_critical_syscalls() {
1102 for name in &["clone", "clone3", "execveat", "ioctl", "prctl", "socket"] {
1105 assert!(
1106 SeccompManager::ARG_FILTERED_SYSCALLS.contains(name),
1107 "'{}' must be in ARG_FILTERED_SYSCALLS",
1108 name
1109 );
1110 }
1111 }
1112
1113 #[test]
1114 fn test_clone3_allowed_in_minimal_filter() {
1115 let rules = SeccompManager::minimal_filter(true).unwrap();
1120 assert!(
1121 rules.contains_key(&libc::SYS_clone3),
1122 "clone3 must be in the seccomp allowlist (glibc 2.34+ requires it)"
1123 );
1124 }
1125
1126 #[test]
1127 fn test_clone_is_allowed_with_arg_filter() {
1128 let rules = SeccompManager::minimal_filter(true).unwrap();
1130 assert!(
1131 rules.contains_key(&libc::SYS_clone),
1132 "clone must be in the seccomp allowlist with arg filters"
1133 );
1134 }
1135
1136 #[test]
1137 fn test_high_risk_syscalls_removed_from_base_allowlist() {
1138 let base = SeccompManager::base_allowed_syscalls();
1139 let removed = [
1140 libc::SYS_chown,
1141 libc::SYS_fchown,
1142 libc::SYS_lchown,
1143 libc::SYS_fchownat,
1144 libc::SYS_sync,
1145 libc::SYS_syncfs,
1146 libc::SYS_mlock,
1147 libc::SYS_munlock,
1148 libc::SYS_mincore,
1149 libc::SYS_vfork,
1150 libc::SYS_tkill,
1151 ];
1152
1153 for syscall in removed {
1154 assert!(
1155 !base.contains(&syscall),
1156 "syscall {} unexpectedly present in base allowlist",
1157 syscall
1158 );
1159 }
1160 }
1161
1162 #[test]
1163 fn test_custom_profile_preserves_clone_arg_filters() {
1164 let rules = SeccompManager::minimal_filter(true).unwrap();
1169
1170 for name in SeccompManager::ARG_FILTERED_SYSCALLS {
1175 if *name == "clone3" {
1176 continue;
1180 }
1181 if let Some(nr) = syscall_name_to_number(name) {
1182 let entry = rules.get(&nr);
1183 assert!(
1184 entry.is_some() && !entry.unwrap().is_empty(),
1185 "built-in filter must have argument-level rules for '{}' \
1186 so apply_profile_from_file can merge them into custom profiles",
1187 name
1188 );
1189 }
1190 }
1191 }
1192
1193 #[test]
1194 fn test_memfd_create_not_in_default_allowlist() {
1195 let base = SeccompManager::base_allowed_syscalls();
1197 assert!(
1198 !base.contains(&libc::SYS_memfd_create),
1199 "memfd_create must not be in the default seccomp allowlist (fileless exec risk)"
1200 );
1201 let rules = SeccompManager::minimal_filter(true).unwrap();
1203 assert!(
1204 !rules.contains_key(&libc::SYS_memfd_create),
1205 "memfd_create must not be in the compiled seccomp filter rules"
1206 );
1207 }
1208
1209 #[test]
1210 fn test_mprotect_has_arg_filtering() {
1211 let base = SeccompManager::base_allowed_syscalls();
1216 assert!(
1217 !base.contains(&libc::SYS_mprotect),
1218 "SYS_mprotect must not be unconditionally allowed - needs arg filtering"
1219 );
1220
1221 let rules = SeccompManager::minimal_filter(true).unwrap();
1224 let mprotect_rules = rules.get(&libc::SYS_mprotect);
1225 assert!(
1226 mprotect_rules.is_some(),
1227 "mprotect must be present in the seccomp filter rules"
1228 );
1229 assert!(
1230 !mprotect_rules.unwrap().is_empty(),
1231 "mprotect must have argument-level conditions to prevent W^X violations"
1232 );
1233 }
1234
1235 #[test]
1236 fn test_unsafe_blocks_have_safety_comments() {
1237 let source = include_str!("seccomp.rs");
1239 let mut pos = 0;
1240 while let Some(idx) = source[pos..].find("unsafe {") {
1241 let abs_idx = pos + idx;
1242 let start = abs_idx.saturating_sub(200);
1244 let context = &source[start..abs_idx];
1245 assert!(
1246 context.contains("SAFETY:"),
1247 "unsafe block at byte {} must have a // SAFETY: comment. Context: ...{}...",
1248 abs_idx,
1249 &source[abs_idx.saturating_sub(80)..abs_idx + 10]
1250 );
1251 pos = abs_idx + 1;
1252 }
1253 }
1254
1255 fn mprotect_would_allow(prot: u64) -> bool {
1265 let mask = (libc::PROT_WRITE | libc::PROT_EXEC) as u64;
1266 let allowed_values: &[u64] = &[0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64];
1267 let masked = prot & mask;
1268 allowed_values.contains(&masked)
1269 }
1270
1271 #[test]
1272 fn test_mprotect_allows_prot_none() {
1273 assert!(mprotect_would_allow(0), "PROT_NONE must be allowed");
1274 }
1275
1276 #[test]
1277 fn test_mprotect_allows_prot_read_only() {
1278 assert!(
1279 mprotect_would_allow(libc::PROT_READ as u64),
1280 "PROT_READ must be allowed (W|X bits are 0)"
1281 );
1282 }
1283
1284 #[test]
1285 fn test_mprotect_allows_prot_read_write() {
1286 assert!(
1287 mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE) as u64),
1288 "PROT_READ|PROT_WRITE must be allowed"
1289 );
1290 }
1291
1292 #[test]
1293 fn test_mprotect_allows_prot_read_exec() {
1294 assert!(
1295 mprotect_would_allow((libc::PROT_READ | libc::PROT_EXEC) as u64),
1296 "PROT_READ|PROT_EXEC must be allowed"
1297 );
1298 }
1299
1300 #[test]
1301 fn test_mprotect_rejects_prot_write_exec() {
1302 assert!(
1303 !mprotect_would_allow((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
1304 "PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
1305 );
1306 }
1307
1308 #[test]
1309 fn test_mprotect_rejects_prot_read_write_exec() {
1310 assert!(
1311 !mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC) as u64),
1312 "PROT_READ|PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
1313 );
1314 }
1315
1316 #[test]
1317 fn test_mprotect_allows_prot_write_alone() {
1318 assert!(
1319 mprotect_would_allow(libc::PROT_WRITE as u64),
1320 "PROT_WRITE alone must be allowed"
1321 );
1322 }
1323
1324 #[test]
1325 fn test_mprotect_allows_prot_exec_alone() {
1326 assert!(
1327 mprotect_would_allow(libc::PROT_EXEC as u64),
1328 "PROT_EXEC alone must be allowed"
1329 );
1330 }
1331}