1use crate::error::{NucleusError, Result};
2use crate::security::policy::sha256_hex;
3use seccompiler::{BpfProgram, SeccompAction, SeccompCondition, SeccompFilter, SeccompRule};
4use std::collections::BTreeMap;
5use std::path::Path;
6use tracing::{debug, info, warn};
7
8pub struct SeccompManager {
13 applied: bool,
14}
15
16const DENIED_CLONE_NAMESPACE_FLAGS: u64 = (libc::CLONE_NEWUSER
17 | libc::CLONE_NEWNS
18 | libc::CLONE_NEWNET
19 | libc::CLONE_NEWIPC
20 | libc::CLONE_NEWUTS
21 | libc::CLONE_NEWPID
22 | libc::CLONE_NEWCGROUP
23 | libc::CLONE_NEWTIME) as u64;
24
25impl SeccompManager {
26 pub fn new() -> Self {
27 Self { applied: false }
28 }
29
30 fn base_allowed_syscalls() -> Vec<i64> {
31 let mut syscalls = vec![
32 libc::SYS_read,
34 libc::SYS_write,
35 libc::SYS_openat,
36 libc::SYS_close,
37 libc::SYS_fstat,
38 libc::SYS_lseek,
39 libc::SYS_fcntl,
40 libc::SYS_readv,
41 libc::SYS_writev,
42 libc::SYS_pread64,
43 libc::SYS_pwrite64,
44 libc::SYS_readlinkat,
45 libc::SYS_newfstatat,
46 libc::SYS_statx,
47 libc::SYS_faccessat,
48 libc::SYS_faccessat2,
49 libc::SYS_dup,
50 libc::SYS_dup3,
51 libc::SYS_pipe2,
52 libc::SYS_unlinkat,
53 libc::SYS_renameat,
54 libc::SYS_renameat2,
55 libc::SYS_linkat,
56 libc::SYS_symlinkat,
57 libc::SYS_fchmod,
58 libc::SYS_fchmodat,
59 libc::SYS_truncate,
60 libc::SYS_ftruncate,
61 libc::SYS_fallocate,
62 #[cfg(target_arch = "x86_64")]
63 libc::SYS_fadvise64,
64 libc::SYS_fsync,
65 libc::SYS_fdatasync,
66 libc::SYS_flock,
67 #[cfg(target_arch = "x86_64")]
68 libc::SYS_sendfile,
69 libc::SYS_copy_file_range,
70 libc::SYS_splice,
71 libc::SYS_tee,
72 libc::SYS_mmap,
74 libc::SYS_munmap,
75 libc::SYS_brk,
76 libc::SYS_mremap,
77 libc::SYS_madvise,
78 libc::SYS_msync,
79 libc::SYS_execve,
85 libc::SYS_wait4,
87 libc::SYS_waitid,
88 libc::SYS_exit,
89 libc::SYS_exit_group,
90 libc::SYS_getpid,
91 libc::SYS_gettid,
92 libc::SYS_getuid,
93 libc::SYS_getgid,
94 libc::SYS_geteuid,
95 libc::SYS_getegid,
96 libc::SYS_getppid,
97 libc::SYS_setsid,
98 libc::SYS_getgroups,
99 libc::SYS_rt_sigaction,
101 libc::SYS_rt_sigprocmask,
102 libc::SYS_rt_sigreturn,
103 libc::SYS_rt_sigsuspend,
104 libc::SYS_sigaltstack,
105 libc::SYS_kill,
106 libc::SYS_tgkill,
107 libc::SYS_clock_gettime,
109 libc::SYS_clock_getres,
110 libc::SYS_clock_nanosleep,
111 libc::SYS_gettimeofday,
112 libc::SYS_nanosleep,
113 libc::SYS_getcwd,
115 libc::SYS_chdir,
116 libc::SYS_fchdir,
117 libc::SYS_mkdirat,
118 libc::SYS_getdents64,
119 libc::SYS_uname,
121 libc::SYS_getrandom,
122 libc::SYS_futex,
123 libc::SYS_set_tid_address,
124 libc::SYS_set_robust_list,
125 libc::SYS_get_robust_list,
126 libc::SYS_sysinfo,
127 libc::SYS_umask,
128 libc::SYS_prlimit64,
129 libc::SYS_getrusage,
130 libc::SYS_times,
131 libc::SYS_sched_yield,
132 libc::SYS_sched_getaffinity,
133 libc::SYS_getcpu,
134 libc::SYS_rseq,
135 libc::SYS_close_range,
136 libc::SYS_landlock_create_ruleset,
140 libc::SYS_landlock_add_rule,
141 libc::SYS_landlock_restrict_self,
142 libc::SYS_getsockname,
144 libc::SYS_getpeername,
145 libc::SYS_socketpair,
146 libc::SYS_getsockopt,
147 libc::SYS_ppoll,
149 libc::SYS_pselect6,
150 libc::SYS_epoll_create1,
151 libc::SYS_epoll_ctl,
152 libc::SYS_epoll_pwait,
153 libc::SYS_eventfd2,
154 libc::SYS_signalfd4,
155 libc::SYS_timerfd_create,
156 libc::SYS_timerfd_settime,
157 libc::SYS_timerfd_gettime,
158 ];
159
160 #[cfg(target_arch = "x86_64")]
162 syscalls.extend_from_slice(&[
163 libc::SYS_open,
164 libc::SYS_stat,
165 libc::SYS_lstat,
166 libc::SYS_access,
167 libc::SYS_readlink,
168 libc::SYS_dup2,
169 libc::SYS_pipe,
170 libc::SYS_unlink,
171 libc::SYS_rename,
172 libc::SYS_link,
173 libc::SYS_symlink,
174 libc::SYS_chmod,
175 libc::SYS_mkdir,
176 libc::SYS_rmdir,
177 libc::SYS_getdents,
178 libc::SYS_getpgrp,
179 libc::SYS_arch_prctl,
180 libc::SYS_getrlimit,
181 libc::SYS_poll,
182 libc::SYS_select,
183 libc::SYS_epoll_create,
184 libc::SYS_epoll_wait,
185 libc::SYS_eventfd,
186 libc::SYS_signalfd,
187 ]);
188
189 syscalls
190 }
191
192 fn allowed_socket_domains(allow_network: bool) -> Vec<i32> {
193 if allow_network {
194 vec![libc::AF_UNIX, libc::AF_INET, libc::AF_INET6]
195 } else {
196 vec![libc::AF_UNIX]
197 }
198 }
199
200 fn network_mode_syscalls(allow_network: bool) -> Vec<i64> {
201 if allow_network {
202 vec![
203 libc::SYS_connect,
204 libc::SYS_sendto,
205 libc::SYS_recvfrom,
206 libc::SYS_sendmsg,
207 libc::SYS_recvmsg,
208 libc::SYS_shutdown,
209 libc::SYS_bind,
210 libc::SYS_listen,
211 libc::SYS_accept,
212 libc::SYS_accept4,
213 libc::SYS_setsockopt,
214 ]
215 } else {
216 Vec::new()
217 }
218 }
219
220 fn minimal_filter(allow_network: bool) -> Result<BTreeMap<i64, Vec<SeccompRule>>> {
230 let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
231
232 let allowed_syscalls = Self::base_allowed_syscalls();
234
235 for syscall in allowed_syscalls {
237 rules.insert(syscall, Vec::new());
238 }
239
240 for syscall in Self::network_mode_syscalls(allow_network) {
242 rules.insert(syscall, Vec::new());
243 }
244
245 let mut socket_rules = Vec::new();
248 for domain in Self::allowed_socket_domains(allow_network) {
249 let condition = SeccompCondition::new(
250 0, seccompiler::SeccompCmpArgLen::Dword,
252 seccompiler::SeccompCmpOp::Eq,
253 domain as u64,
254 )
255 .map_err(|e| {
256 NucleusError::SeccompError(format!(
257 "Failed to create socket domain condition: {}",
258 e
259 ))
260 })?;
261 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
262 NucleusError::SeccompError(format!("Failed to create socket rule: {}", e))
263 })?;
264 socket_rules.push(rule);
265 }
266 rules.insert(libc::SYS_socket, socket_rules);
267
268 let ioctl_allowed: &[u64] = &[
270 0x5401, 0x5402, 0x5403, 0x5404, 0x540B, 0x540F, 0x5410, 0x5413, 0x5429, 0x541B, 0x5451, 0x5450, ];
285 let mut ioctl_rules = Vec::new();
286 for &request in ioctl_allowed {
287 let condition = SeccompCondition::new(
288 1, seccompiler::SeccompCmpArgLen::Dword,
290 seccompiler::SeccompCmpOp::Eq,
291 request,
292 )
293 .map_err(|e| {
294 NucleusError::SeccompError(format!("Failed to create ioctl condition: {}", e))
295 })?;
296 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
297 NucleusError::SeccompError(format!("Failed to create ioctl rule: {}", e))
298 })?;
299 ioctl_rules.push(rule);
300 }
301 rules.insert(libc::SYS_ioctl, ioctl_rules);
302
303 let prctl_allowed: &[u64] = &[
309 1, 2, 15, 16, 38, 39, ];
316 let mut prctl_rules = Vec::new();
317 for &option in prctl_allowed {
318 let condition = SeccompCondition::new(
319 0, seccompiler::SeccompCmpArgLen::Dword,
321 seccompiler::SeccompCmpOp::Eq,
322 option,
323 )
324 .map_err(|e| {
325 NucleusError::SeccompError(format!("Failed to create prctl condition: {}", e))
326 })?;
327 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
328 NucleusError::SeccompError(format!("Failed to create prctl rule: {}", e))
329 })?;
330 prctl_rules.push(rule);
331 }
332 rules.insert(libc::SYS_prctl, prctl_rules);
333
334 let mut mprotect_rules = Vec::new();
336 for allowed in [0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64] {
337 let condition = SeccompCondition::new(
338 2, seccompiler::SeccompCmpArgLen::Dword,
340 seccompiler::SeccompCmpOp::MaskedEq((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
341 allowed,
342 )
343 .map_err(|e| {
344 NucleusError::SeccompError(format!("Failed to create mprotect condition: {}", e))
345 })?;
346 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
347 NucleusError::SeccompError(format!("Failed to create mprotect rule: {}", e))
348 })?;
349 mprotect_rules.push(rule);
350 }
351 rules.insert(libc::SYS_mprotect, mprotect_rules);
352
353 rules.insert(libc::SYS_clone3, Vec::new());
365
366 let clone_condition = SeccompCondition::new(
368 0, seccompiler::SeccompCmpArgLen::Qword,
370 seccompiler::SeccompCmpOp::MaskedEq(DENIED_CLONE_NAMESPACE_FLAGS),
371 0, )
373 .map_err(|e| {
374 NucleusError::SeccompError(format!("Failed to create clone condition: {}", e))
375 })?;
376 let clone_rule = SeccompRule::new(vec![clone_condition]).map_err(|e| {
377 NucleusError::SeccompError(format!("Failed to create clone rule: {}", e))
378 })?;
379 rules.insert(libc::SYS_clone, vec![clone_rule]);
380
381 let execveat_condition = SeccompCondition::new(
388 4, seccompiler::SeccompCmpArgLen::Dword,
390 seccompiler::SeccompCmpOp::MaskedEq(libc::AT_EMPTY_PATH as u64),
391 0, )
393 .map_err(|e| {
394 NucleusError::SeccompError(format!("Failed to create execveat condition: {}", e))
395 })?;
396 let execveat_rule = SeccompRule::new(vec![execveat_condition]).map_err(|e| {
397 NucleusError::SeccompError(format!("Failed to create execveat rule: {}", e))
398 })?;
399 rules.insert(libc::SYS_execveat, vec![execveat_rule]);
400
401 Ok(rules)
402 }
403
404 pub fn compile_minimal_filter() -> Result<BpfProgram> {
409 let rules = Self::minimal_filter(true)?;
410 let filter = SeccompFilter::new(
411 rules,
412 SeccompAction::Errno(libc::EPERM as u32),
413 SeccompAction::Allow,
414 std::env::consts::ARCH.try_into().map_err(|e| {
415 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
416 })?,
417 )
418 .map_err(|e| {
419 NucleusError::SeccompError(format!("Failed to create seccomp filter: {}", e))
420 })?;
421
422 let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
423 NucleusError::SeccompError(format!("Failed to compile BPF program: {}", e))
424 })?;
425
426 Ok(bpf_prog)
427 }
428
429 pub fn apply_minimal_filter(&mut self) -> Result<bool> {
437 self.apply_minimal_filter_with_mode(false, false)
438 }
439
440 pub fn apply_minimal_filter_with_mode(
445 &mut self,
446 best_effort: bool,
447 log_denied: bool,
448 ) -> Result<bool> {
449 self.apply_filter_for_network_mode(true, best_effort, log_denied)
450 }
451
452 pub fn apply_filter_for_network_mode(
461 &mut self,
462 allow_network: bool,
463 best_effort: bool,
464 log_denied: bool,
465 ) -> Result<bool> {
466 if self.applied {
467 debug!("Seccomp filter already applied, skipping");
468 return Ok(true);
469 }
470
471 info!(allow_network, "Applying seccomp filter");
472
473 let rules = match Self::minimal_filter(allow_network) {
474 Ok(r) => r,
475 Err(e) => {
476 if best_effort {
477 warn!(
478 "Failed to create seccomp rules: {} (continuing without seccomp)",
479 e
480 );
481 return Ok(false);
482 }
483 return Err(e);
484 }
485 };
486
487 let filter = match SeccompFilter::new(
488 rules,
489 SeccompAction::Errno(libc::EPERM as u32), SeccompAction::Allow, std::env::consts::ARCH.try_into().map_err(|e| {
492 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
493 })?,
494 ) {
495 Ok(f) => f,
496 Err(e) => {
497 if best_effort {
498 warn!(
499 "Failed to create seccomp filter: {} (continuing without seccomp)",
500 e
501 );
502 return Ok(false);
503 }
504 return Err(NucleusError::SeccompError(format!(
505 "Failed to create seccomp filter: {}",
506 e
507 )));
508 }
509 };
510
511 let bpf_prog: BpfProgram = match filter.try_into() {
512 Ok(p) => p,
513 Err(e) => {
514 if best_effort {
515 warn!(
516 "Failed to compile BPF program: {} (continuing without seccomp)",
517 e
518 );
519 return Ok(false);
520 }
521 return Err(NucleusError::SeccompError(format!(
522 "Failed to compile BPF program: {}",
523 e
524 )));
525 }
526 };
527
528 match Self::apply_bpf_program(&bpf_prog, log_denied) {
530 Ok(_) => {
531 self.applied = true;
532 info!("Successfully applied seccomp filter");
533 Ok(true)
534 }
535 Err(e) => {
536 if best_effort {
537 warn!(
538 "Failed to apply seccomp filter: {} (continuing without seccomp)",
539 e
540 );
541 Ok(false)
542 } else {
543 Err(NucleusError::SeccompError(format!(
544 "Failed to apply seccomp filter: {}",
545 e
546 )))
547 }
548 }
549 }
550 }
551
552 pub fn apply_profile_from_file(
571 &mut self,
572 profile_path: &Path,
573 expected_sha256: Option<&str>,
574 audit_mode: bool,
575 ) -> Result<bool> {
576 if self.applied {
577 debug!("Seccomp filter already applied, skipping");
578 return Ok(true);
579 }
580
581 info!("Loading seccomp profile from {:?}", profile_path);
582
583 let content = std::fs::read(profile_path).map_err(|e| {
585 NucleusError::SeccompError(format!(
586 "Failed to read seccomp profile {:?}: {}",
587 profile_path, e
588 ))
589 })?;
590
591 if let Some(expected) = expected_sha256 {
593 let actual = sha256_hex(&content);
594 if actual != expected {
595 return Err(NucleusError::SeccompError(format!(
596 "Seccomp profile hash mismatch: expected {}, got {}",
597 expected, actual
598 )));
599 }
600 info!("Seccomp profile hash verified: {}", actual);
601 }
602
603 let profile: SeccompProfile = serde_json::from_slice(&content).map_err(|e| {
605 NucleusError::SeccompError(format!("Failed to parse seccomp profile: {}", e))
606 })?;
607
608 Self::warn_missing_arg_filters(&profile);
613
614 let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
616
617 for syscall_group in &profile.syscalls {
618 if syscall_group.action == "SCMP_ACT_ALLOW" {
619 for name in &syscall_group.names {
620 if let Some(nr) = syscall_name_to_number(name) {
621 rules.insert(nr, Vec::new());
622 } else {
623 warn!("Unknown syscall in profile: {} (skipping)", name);
624 }
625 }
626 }
627 }
628
629 let builtin_rules = Self::minimal_filter(true)?;
634 for syscall_name in Self::ARG_FILTERED_SYSCALLS {
635 if let Some(nr) = syscall_name_to_number(syscall_name) {
636 if let std::collections::btree_map::Entry::Occupied(mut entry) = rules.entry(nr) {
637 if let Some(builtin) = builtin_rules.get(&nr) {
638 if !builtin.is_empty() {
639 info!(
640 "Merging built-in argument filters for '{}' into custom profile",
641 syscall_name
642 );
643 entry.insert(builtin.clone());
644 }
645 }
646 }
647 }
648 }
649 rules.remove(&libc::SYS_clone3);
651
652 let filter = SeccompFilter::new(
653 rules,
654 SeccompAction::Errno(libc::EPERM as u32),
655 SeccompAction::Allow,
656 std::env::consts::ARCH.try_into().map_err(|e| {
657 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
658 })?,
659 )
660 .map_err(|e| {
661 NucleusError::SeccompError(format!(
662 "Failed to create seccomp filter from profile: {}",
663 e
664 ))
665 })?;
666
667 let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
668 NucleusError::SeccompError(format!("Failed to compile BPF program from profile: {}", e))
669 })?;
670
671 match Self::apply_bpf_program(&bpf_prog, audit_mode) {
672 Ok(_) => {
673 self.applied = true;
674 info!(
675 "Seccomp profile applied from {:?} (log_denied={})",
676 profile_path, audit_mode
677 );
678 Ok(true)
679 }
680 Err(e) => Err(e),
681 }
682 }
683
684 pub fn apply_trace_filter(&mut self) -> Result<bool> {
689 if self.applied {
690 debug!("Seccomp filter already applied, skipping trace filter");
691 return Ok(true);
692 }
693
694 info!("Applying seccomp trace filter (allow-all + LOG)");
695
696 let filter = SeccompFilter::new(
700 BTreeMap::new(),
701 SeccompAction::Allow, SeccompAction::Allow, std::env::consts::ARCH.try_into().map_err(|e| {
704 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
705 })?,
706 )
707 .map_err(|e| NucleusError::SeccompError(format!("Failed to create trace filter: {}", e)))?;
708
709 let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
710 NucleusError::SeccompError(format!("Failed to compile trace BPF: {}", e))
711 })?;
712
713 Self::apply_bpf_program(&bpf_prog, true)?;
715 self.applied = true;
716 info!("Seccomp trace filter applied (all syscalls allowed + logged)");
717 Ok(true)
718 }
719
720 const ARG_FILTERED_SYSCALLS: &'static [&'static str] =
723 &["clone", "clone3", "execveat", "ioctl", "mprotect", "prctl", "socket"];
724
725 fn warn_missing_arg_filters(profile: &SeccompProfile) {
728 for group in &profile.syscalls {
729 if group.action != "SCMP_ACT_ALLOW" {
730 continue;
731 }
732 for name in &group.names {
733 if Self::ARG_FILTERED_SYSCALLS.contains(&name.as_str()) && group.args.is_empty() {
734 warn!(
735 "Custom seccomp profile allows '{}' without argument filters. \
736 The built-in filter restricts this syscall at the argument level. \
737 This profile weakens security compared to the default.",
738 name
739 );
740 }
741 }
742 }
743 }
744
745 pub fn is_applied(&self) -> bool {
747 self.applied
748 }
749
750 fn apply_bpf_program(bpf_prog: &BpfProgram, log_denied: bool) -> Result<()> {
751 let mut flags: libc::c_ulong = 0;
752 if log_denied {
753 flags |= libc::SECCOMP_FILTER_FLAG_LOG as libc::c_ulong;
754 }
755
756 match Self::apply_bpf_program_with_flags(bpf_prog, flags) {
757 Ok(()) => Ok(()),
758 Err(err)
759 if log_denied
760 && err.raw_os_error() == Some(libc::EINVAL)
761 && libc::SECCOMP_FILTER_FLAG_LOG != 0 =>
762 {
763 warn!(
764 "Kernel rejected SECCOMP_FILTER_FLAG_LOG; continuing with seccomp \
765 enforcement without deny logging"
766 );
767 Self::apply_bpf_program_with_flags(bpf_prog, 0)?;
768 Ok(())
769 }
770 Err(err) => Err(NucleusError::SeccompError(format!(
771 "Failed to apply seccomp filter: {}",
772 err
773 ))),
774 }
775 }
776
777 fn apply_bpf_program_with_flags(
778 bpf_prog: &BpfProgram,
779 flags: libc::c_ulong,
780 ) -> std::io::Result<()> {
781 let rc = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
784 if rc != 0 {
785 return Err(std::io::Error::last_os_error());
786 }
787
788 let prog = libc::sock_fprog {
789 len: bpf_prog.len() as u16,
790 filter: bpf_prog.as_ptr() as *mut libc::sock_filter,
791 };
792
793 let rc = unsafe {
796 libc::syscall(
797 libc::SYS_seccomp,
798 libc::SECCOMP_SET_MODE_FILTER,
799 flags,
800 &prog as *const libc::sock_fprog,
801 )
802 };
803
804 if rc < 0 {
805 return Err(std::io::Error::last_os_error());
806 }
807
808 Ok(())
809 }
810}
811
812use crate::security::seccomp_generate::SeccompProfile;
814
815fn syscall_name_to_number(name: &str) -> Option<i64> {
819 match name {
820 "read" => Some(libc::SYS_read),
822 "write" => Some(libc::SYS_write),
823 #[cfg(target_arch = "x86_64")]
824 "open" => Some(libc::SYS_open),
825 "openat" => Some(libc::SYS_openat),
826 "close" => Some(libc::SYS_close),
827 #[cfg(target_arch = "x86_64")]
828 "stat" => Some(libc::SYS_stat),
829 "fstat" => Some(libc::SYS_fstat),
830 #[cfg(target_arch = "x86_64")]
831 "lstat" => Some(libc::SYS_lstat),
832 "lseek" => Some(libc::SYS_lseek),
833 #[cfg(target_arch = "x86_64")]
834 "access" => Some(libc::SYS_access),
835 "fcntl" => Some(libc::SYS_fcntl),
836 "readv" => Some(libc::SYS_readv),
837 "writev" => Some(libc::SYS_writev),
838 "pread64" => Some(libc::SYS_pread64),
839 "pwrite64" => Some(libc::SYS_pwrite64),
840 #[cfg(target_arch = "x86_64")]
841 "readlink" => Some(libc::SYS_readlink),
842 "readlinkat" => Some(libc::SYS_readlinkat),
843 "newfstatat" => Some(libc::SYS_newfstatat),
844 "statx" => Some(libc::SYS_statx),
845 "faccessat" => Some(libc::SYS_faccessat),
846 "faccessat2" => Some(libc::SYS_faccessat2),
847 "dup" => Some(libc::SYS_dup),
848 #[cfg(target_arch = "x86_64")]
849 "dup2" => Some(libc::SYS_dup2),
850 "dup3" => Some(libc::SYS_dup3),
851 #[cfg(target_arch = "x86_64")]
852 "pipe" => Some(libc::SYS_pipe),
853 "pipe2" => Some(libc::SYS_pipe2),
854 #[cfg(target_arch = "x86_64")]
855 "unlink" => Some(libc::SYS_unlink),
856 "unlinkat" => Some(libc::SYS_unlinkat),
857 #[cfg(target_arch = "x86_64")]
858 "rename" => Some(libc::SYS_rename),
859 "renameat" => Some(libc::SYS_renameat),
860 "renameat2" => Some(libc::SYS_renameat2),
861 #[cfg(target_arch = "x86_64")]
862 "link" => Some(libc::SYS_link),
863 "linkat" => Some(libc::SYS_linkat),
864 #[cfg(target_arch = "x86_64")]
865 "symlink" => Some(libc::SYS_symlink),
866 "symlinkat" => Some(libc::SYS_symlinkat),
867 #[cfg(target_arch = "x86_64")]
868 "chmod" => Some(libc::SYS_chmod),
869 "fchmod" => Some(libc::SYS_fchmod),
870 "fchmodat" => Some(libc::SYS_fchmodat),
871 "truncate" => Some(libc::SYS_truncate),
872 "ftruncate" => Some(libc::SYS_ftruncate),
873 "fallocate" => Some(libc::SYS_fallocate),
874 #[cfg(target_arch = "x86_64")]
875 "fadvise64" => Some(libc::SYS_fadvise64),
876 "fsync" => Some(libc::SYS_fsync),
877 "fdatasync" => Some(libc::SYS_fdatasync),
878 "flock" => Some(libc::SYS_flock),
879 #[cfg(target_arch = "x86_64")]
880 "sendfile" => Some(libc::SYS_sendfile),
881 "copy_file_range" => Some(libc::SYS_copy_file_range),
882 "splice" => Some(libc::SYS_splice),
883 "tee" => Some(libc::SYS_tee),
884 "mmap" => Some(libc::SYS_mmap),
886 "munmap" => Some(libc::SYS_munmap),
887 "mprotect" => Some(libc::SYS_mprotect),
888 "brk" => Some(libc::SYS_brk),
889 "mremap" => Some(libc::SYS_mremap),
890 "madvise" => Some(libc::SYS_madvise),
891 "msync" => Some(libc::SYS_msync),
892 "mlock" => Some(libc::SYS_mlock),
893 "munlock" => Some(libc::SYS_munlock),
894 #[cfg(target_arch = "x86_64")]
896 "fork" => Some(libc::SYS_fork),
897 "clone" => Some(libc::SYS_clone),
898 "clone3" => Some(libc::SYS_clone3),
899 "execve" => Some(libc::SYS_execve),
900 "execveat" => Some(libc::SYS_execveat),
901 "wait4" => Some(libc::SYS_wait4),
902 "waitid" => Some(libc::SYS_waitid),
903 "exit" => Some(libc::SYS_exit),
904 "exit_group" => Some(libc::SYS_exit_group),
905 "getpid" => Some(libc::SYS_getpid),
906 "gettid" => Some(libc::SYS_gettid),
907 "getuid" => Some(libc::SYS_getuid),
908 "getgid" => Some(libc::SYS_getgid),
909 "geteuid" => Some(libc::SYS_geteuid),
910 "getegid" => Some(libc::SYS_getegid),
911 "getppid" => Some(libc::SYS_getppid),
912 #[cfg(target_arch = "x86_64")]
913 "getpgrp" => Some(libc::SYS_getpgrp),
914 "setsid" => Some(libc::SYS_setsid),
915 "getgroups" => Some(libc::SYS_getgroups),
916 "rt_sigaction" => Some(libc::SYS_rt_sigaction),
918 "rt_sigprocmask" => Some(libc::SYS_rt_sigprocmask),
919 "rt_sigreturn" => Some(libc::SYS_rt_sigreturn),
920 "rt_sigsuspend" => Some(libc::SYS_rt_sigsuspend),
921 "sigaltstack" => Some(libc::SYS_sigaltstack),
922 "kill" => Some(libc::SYS_kill),
923 "tgkill" => Some(libc::SYS_tgkill),
924 "clock_gettime" => Some(libc::SYS_clock_gettime),
926 "clock_getres" => Some(libc::SYS_clock_getres),
927 "clock_nanosleep" => Some(libc::SYS_clock_nanosleep),
928 "gettimeofday" => Some(libc::SYS_gettimeofday),
929 "nanosleep" => Some(libc::SYS_nanosleep),
930 "getcwd" => Some(libc::SYS_getcwd),
932 "chdir" => Some(libc::SYS_chdir),
933 "fchdir" => Some(libc::SYS_fchdir),
934 #[cfg(target_arch = "x86_64")]
935 "mkdir" => Some(libc::SYS_mkdir),
936 "mkdirat" => Some(libc::SYS_mkdirat),
937 #[cfg(target_arch = "x86_64")]
938 "rmdir" => Some(libc::SYS_rmdir),
939 #[cfg(target_arch = "x86_64")]
940 "getdents" => Some(libc::SYS_getdents),
941 "getdents64" => Some(libc::SYS_getdents64),
942 "socket" => Some(libc::SYS_socket),
944 "connect" => Some(libc::SYS_connect),
945 "sendto" => Some(libc::SYS_sendto),
946 "recvfrom" => Some(libc::SYS_recvfrom),
947 "sendmsg" => Some(libc::SYS_sendmsg),
948 "recvmsg" => Some(libc::SYS_recvmsg),
949 "shutdown" => Some(libc::SYS_shutdown),
950 "bind" => Some(libc::SYS_bind),
951 "listen" => Some(libc::SYS_listen),
952 "accept" => Some(libc::SYS_accept),
953 "accept4" => Some(libc::SYS_accept4),
954 "setsockopt" => Some(libc::SYS_setsockopt),
955 "getsockopt" => Some(libc::SYS_getsockopt),
956 "getsockname" => Some(libc::SYS_getsockname),
957 "getpeername" => Some(libc::SYS_getpeername),
958 "socketpair" => Some(libc::SYS_socketpair),
959 #[cfg(target_arch = "x86_64")]
961 "poll" => Some(libc::SYS_poll),
962 "ppoll" => Some(libc::SYS_ppoll),
963 #[cfg(target_arch = "x86_64")]
964 "select" => Some(libc::SYS_select),
965 "pselect6" => Some(libc::SYS_pselect6),
966 #[cfg(target_arch = "x86_64")]
967 "epoll_create" => Some(libc::SYS_epoll_create),
968 "epoll_create1" => Some(libc::SYS_epoll_create1),
969 "epoll_ctl" => Some(libc::SYS_epoll_ctl),
970 #[cfg(target_arch = "x86_64")]
971 "epoll_wait" => Some(libc::SYS_epoll_wait),
972 "epoll_pwait" => Some(libc::SYS_epoll_pwait),
973 #[cfg(target_arch = "x86_64")]
974 "eventfd" => Some(libc::SYS_eventfd),
975 "eventfd2" => Some(libc::SYS_eventfd2),
976 #[cfg(target_arch = "x86_64")]
977 "signalfd" => Some(libc::SYS_signalfd),
978 "signalfd4" => Some(libc::SYS_signalfd4),
979 "timerfd_create" => Some(libc::SYS_timerfd_create),
980 "timerfd_settime" => Some(libc::SYS_timerfd_settime),
981 "timerfd_gettime" => Some(libc::SYS_timerfd_gettime),
982 "uname" => Some(libc::SYS_uname),
984 "getrandom" => Some(libc::SYS_getrandom),
985 "futex" => Some(libc::SYS_futex),
986 "set_tid_address" => Some(libc::SYS_set_tid_address),
987 "set_robust_list" => Some(libc::SYS_set_robust_list),
988 "get_robust_list" => Some(libc::SYS_get_robust_list),
989 #[cfg(target_arch = "x86_64")]
990 "arch_prctl" => Some(libc::SYS_arch_prctl),
991 "sysinfo" => Some(libc::SYS_sysinfo),
992 "umask" => Some(libc::SYS_umask),
993 #[cfg(target_arch = "x86_64")]
994 "getrlimit" => Some(libc::SYS_getrlimit),
995 "prlimit64" => Some(libc::SYS_prlimit64),
996 "getrusage" => Some(libc::SYS_getrusage),
997 "times" => Some(libc::SYS_times),
998 "sched_yield" => Some(libc::SYS_sched_yield),
999 "sched_getaffinity" => Some(libc::SYS_sched_getaffinity),
1000 "getcpu" => Some(libc::SYS_getcpu),
1001 "rseq" => Some(libc::SYS_rseq),
1002 "close_range" => Some(libc::SYS_close_range),
1003 "memfd_create" => Some(libc::SYS_memfd_create),
1004 "ioctl" => Some(libc::SYS_ioctl),
1005 "prctl" => Some(libc::SYS_prctl),
1006 "landlock_create_ruleset" => Some(libc::SYS_landlock_create_ruleset),
1008 "landlock_add_rule" => Some(libc::SYS_landlock_add_rule),
1009 "landlock_restrict_self" => Some(libc::SYS_landlock_restrict_self),
1010 _ => None,
1011 }
1012}
1013
1014impl Default for SeccompManager {
1015 fn default() -> Self {
1016 Self::new()
1017 }
1018}
1019
1020#[cfg(test)]
1021mod tests {
1022 use super::*;
1023
1024 #[test]
1025 fn test_seccomp_manager_initial_state() {
1026 let mgr = SeccompManager::new();
1027 assert!(!mgr.is_applied());
1028 }
1029
1030 #[test]
1031 fn test_apply_idempotent() {
1032 let mgr = SeccompManager::new();
1033 assert!(!mgr.is_applied());
1037 }
1038
1039 #[test]
1040 fn test_clone_denied_flags_include_newcgroup() {
1041 assert_ne!(
1042 DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWCGROUP as u64,
1043 0
1044 );
1045 }
1046
1047 #[test]
1048 fn test_clone_denied_flags_include_newtime() {
1049 assert_ne!(
1050 DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWTIME as u64,
1051 0,
1052 "CLONE_NEWTIME must be in denied clone namespace flags"
1053 );
1054 }
1055
1056 #[test]
1057 fn test_network_none_socket_domains_are_unix_only() {
1058 let domains = SeccompManager::allowed_socket_domains(false);
1059 assert_eq!(domains, vec![libc::AF_UNIX]);
1060 }
1061
1062 #[test]
1063 fn test_network_enabled_socket_domains_exclude_netlink() {
1064 let domains = SeccompManager::allowed_socket_domains(true);
1065 assert!(domains.contains(&libc::AF_UNIX));
1066 assert!(domains.contains(&libc::AF_INET));
1067 assert!(domains.contains(&libc::AF_INET6));
1068 assert!(!domains.contains(&libc::AF_NETLINK));
1069 }
1070
1071 #[test]
1072 fn test_network_mode_syscalls_only_enabled_when_network_allowed() {
1073 let none = SeccompManager::network_mode_syscalls(false);
1074 assert!(none.is_empty());
1075
1076 let enabled = SeccompManager::network_mode_syscalls(true);
1077 assert!(enabled.contains(&libc::SYS_connect));
1078 assert!(enabled.contains(&libc::SYS_bind));
1079 assert!(enabled.contains(&libc::SYS_listen));
1080 assert!(enabled.contains(&libc::SYS_accept));
1081 assert!(enabled.contains(&libc::SYS_setsockopt));
1082 }
1083
1084 #[test]
1085 fn test_landlock_bootstrap_syscalls_present_in_base_allowlist() {
1086 let base = SeccompManager::base_allowed_syscalls();
1087 assert!(base.contains(&libc::SYS_landlock_create_ruleset));
1088 assert!(base.contains(&libc::SYS_landlock_add_rule));
1089 assert!(base.contains(&libc::SYS_landlock_restrict_self));
1090 }
1091
1092 #[test]
1093 fn test_x32_legacy_range_not_allowlisted() {
1094 let base = SeccompManager::base_allowed_syscalls();
1095 let net = SeccompManager::network_mode_syscalls(true);
1096 for nr in 512_i64..=547_i64 {
1097 assert!(
1098 !base.contains(&nr) && !net.contains(&nr),
1099 "x32 syscall number {} unexpectedly allowlisted",
1100 nr
1101 );
1102 }
1103 }
1104
1105 #[test]
1106 fn test_i386_compat_socketcall_range_not_allowlisted() {
1107 let base = SeccompManager::base_allowed_syscalls();
1108 let net = SeccompManager::network_mode_syscalls(true);
1109 for nr in 359_i64..=373_i64 {
1112 assert!(
1113 !base.contains(&nr) && !net.contains(&nr),
1114 "i386 compat syscall number {} unexpectedly allowlisted",
1115 nr
1116 );
1117 }
1118 }
1119
1120 #[test]
1121 fn test_minimal_filter_allowlist_counts_are_stable() {
1122 let base = SeccompManager::base_allowed_syscalls();
1123 let net = SeccompManager::network_mode_syscalls(true);
1124
1125 assert_eq!(base.len(), 131);
1131 assert_eq!(net.len(), 11);
1132 assert_eq!(base.len() + 7, 138);
1133 assert_eq!(base.len() + net.len() + 7, 149);
1134 }
1135
1136 #[test]
1137 fn test_arg_filtered_syscalls_list_includes_critical_syscalls() {
1138 for name in &["clone", "clone3", "execveat", "ioctl", "prctl", "socket"] {
1141 assert!(
1142 SeccompManager::ARG_FILTERED_SYSCALLS.contains(name),
1143 "'{}' must be in ARG_FILTERED_SYSCALLS",
1144 name
1145 );
1146 }
1147 }
1148
1149 #[test]
1150 fn test_clone3_allowed_in_minimal_filter() {
1151 let rules = SeccompManager::minimal_filter(true).unwrap();
1156 assert!(
1157 rules.contains_key(&libc::SYS_clone3),
1158 "clone3 must be in the seccomp allowlist (glibc 2.34+ requires it)"
1159 );
1160 }
1161
1162 #[test]
1163 fn test_clone_is_allowed_with_arg_filter() {
1164 let rules = SeccompManager::minimal_filter(true).unwrap();
1166 assert!(
1167 rules.contains_key(&libc::SYS_clone),
1168 "clone must be in the seccomp allowlist with arg filters"
1169 );
1170 }
1171
1172 #[test]
1173 fn test_high_risk_syscalls_removed_from_base_allowlist() {
1174 let base = SeccompManager::base_allowed_syscalls();
1175 let removed = [
1176 libc::SYS_chown,
1177 libc::SYS_fchown,
1178 libc::SYS_lchown,
1179 libc::SYS_fchownat,
1180 libc::SYS_sync,
1181 libc::SYS_syncfs,
1182 libc::SYS_mlock,
1183 libc::SYS_munlock,
1184 libc::SYS_mincore,
1185 libc::SYS_vfork,
1186 libc::SYS_tkill,
1187 ];
1188
1189 for syscall in removed {
1190 assert!(
1191 !base.contains(&syscall),
1192 "syscall {} unexpectedly present in base allowlist",
1193 syscall
1194 );
1195 }
1196 }
1197
1198 #[test]
1199 fn test_custom_profile_preserves_clone_arg_filters() {
1200 let rules = SeccompManager::minimal_filter(true).unwrap();
1205
1206 for name in SeccompManager::ARG_FILTERED_SYSCALLS {
1211 if *name == "clone3" {
1212 continue;
1216 }
1217 if let Some(nr) = syscall_name_to_number(name) {
1218 let entry = rules.get(&nr);
1219 assert!(
1220 entry.is_some() && !entry.unwrap().is_empty(),
1221 "built-in filter must have argument-level rules for '{}' \
1222 so apply_profile_from_file can merge them into custom profiles",
1223 name
1224 );
1225 }
1226 }
1227 }
1228
1229 #[test]
1230 fn test_memfd_create_not_in_default_allowlist() {
1231 let base = SeccompManager::base_allowed_syscalls();
1233 assert!(
1234 !base.contains(&libc::SYS_memfd_create),
1235 "memfd_create must not be in the default seccomp allowlist (fileless exec risk)"
1236 );
1237 let rules = SeccompManager::minimal_filter(true).unwrap();
1239 assert!(
1240 !rules.contains_key(&libc::SYS_memfd_create),
1241 "memfd_create must not be in the compiled seccomp filter rules"
1242 );
1243 }
1244
1245 #[test]
1246 fn test_mprotect_has_arg_filtering() {
1247 let base = SeccompManager::base_allowed_syscalls();
1252 assert!(
1253 !base.contains(&libc::SYS_mprotect),
1254 "SYS_mprotect must not be unconditionally allowed - needs arg filtering"
1255 );
1256
1257 let rules = SeccompManager::minimal_filter(true).unwrap();
1260 let mprotect_rules = rules.get(&libc::SYS_mprotect);
1261 assert!(
1262 mprotect_rules.is_some(),
1263 "mprotect must be present in the seccomp filter rules"
1264 );
1265 assert!(
1266 !mprotect_rules.unwrap().is_empty(),
1267 "mprotect must have argument-level conditions to prevent W^X violations"
1268 );
1269 }
1270
1271 #[test]
1272 fn test_unsafe_blocks_have_safety_comments() {
1273 let source = include_str!("seccomp.rs");
1275 let mut pos = 0;
1276 while let Some(idx) = source[pos..].find("unsafe {") {
1277 let abs_idx = pos + idx;
1278 let start = abs_idx.saturating_sub(200);
1280 let context = &source[start..abs_idx];
1281 assert!(
1282 context.contains("SAFETY:"),
1283 "unsafe block at byte {} must have a // SAFETY: comment. Context: ...{}...",
1284 abs_idx,
1285 &source[abs_idx.saturating_sub(80)..abs_idx + 10]
1286 );
1287 pos = abs_idx + 1;
1288 }
1289 }
1290
1291 fn mprotect_would_allow(prot: u64) -> bool {
1301 let mask = (libc::PROT_WRITE | libc::PROT_EXEC) as u64;
1302 let allowed_values: &[u64] = &[0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64];
1303 let masked = prot & mask;
1304 allowed_values.contains(&masked)
1305 }
1306
1307 #[test]
1308 fn test_mprotect_allows_prot_none() {
1309 assert!(mprotect_would_allow(0), "PROT_NONE must be allowed");
1310 }
1311
1312 #[test]
1313 fn test_mprotect_allows_prot_read_only() {
1314 assert!(
1315 mprotect_would_allow(libc::PROT_READ as u64),
1316 "PROT_READ must be allowed (W|X bits are 0)"
1317 );
1318 }
1319
1320 #[test]
1321 fn test_mprotect_allows_prot_read_write() {
1322 assert!(
1323 mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE) as u64),
1324 "PROT_READ|PROT_WRITE must be allowed"
1325 );
1326 }
1327
1328 #[test]
1329 fn test_mprotect_allows_prot_read_exec() {
1330 assert!(
1331 mprotect_would_allow((libc::PROT_READ | libc::PROT_EXEC) as u64),
1332 "PROT_READ|PROT_EXEC must be allowed"
1333 );
1334 }
1335
1336 #[test]
1337 fn test_mprotect_rejects_prot_write_exec() {
1338 assert!(
1339 !mprotect_would_allow((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
1340 "PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
1341 );
1342 }
1343
1344 #[test]
1345 fn test_mprotect_rejects_prot_read_write_exec() {
1346 assert!(
1347 !mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC) as u64),
1348 "PROT_READ|PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
1349 );
1350 }
1351
1352 #[test]
1353 fn test_mprotect_allows_prot_write_alone() {
1354 assert!(
1355 mprotect_would_allow(libc::PROT_WRITE as u64),
1356 "PROT_WRITE alone must be allowed"
1357 );
1358 }
1359
1360 #[test]
1361 fn test_mprotect_allows_prot_exec_alone() {
1362 assert!(
1363 mprotect_would_allow(libc::PROT_EXEC as u64),
1364 "PROT_EXEC alone must be allowed"
1365 );
1366 }
1367}