1use crate::error::{NucleusError, Result};
2use crate::security::policy::sha256_hex;
3use seccompiler::{BpfProgram, SeccompAction, SeccompCondition, SeccompFilter, SeccompRule};
4use std::collections::BTreeMap;
5use std::path::Path;
6use tracing::{debug, info, warn};
7
8pub struct SeccompManager {
13 applied: bool,
14}
15
16const DENIED_CLONE_NAMESPACE_FLAGS: u64 = (libc::CLONE_NEWUSER
17 | libc::CLONE_NEWNS
18 | libc::CLONE_NEWNET
19 | libc::CLONE_NEWIPC
20 | libc::CLONE_NEWUTS
21 | libc::CLONE_NEWPID
22 | libc::CLONE_NEWCGROUP
23 | libc::CLONE_NEWTIME) as u64;
24
25impl SeccompManager {
26 pub fn new() -> Self {
27 Self { applied: false }
28 }
29
30 fn base_allowed_syscalls() -> Vec<i64> {
31 let mut syscalls = vec![
32 libc::SYS_read,
34 libc::SYS_write,
35 libc::SYS_openat,
36 libc::SYS_close,
37 libc::SYS_fstat,
38 libc::SYS_lseek,
39 libc::SYS_fcntl,
40 libc::SYS_readv,
41 libc::SYS_writev,
42 libc::SYS_pread64,
43 libc::SYS_pwrite64,
44 libc::SYS_readlinkat,
45 libc::SYS_newfstatat,
46 libc::SYS_statx,
47 libc::SYS_faccessat,
48 libc::SYS_faccessat2,
49 libc::SYS_dup,
50 libc::SYS_dup3,
51 libc::SYS_pipe2,
52 libc::SYS_unlinkat,
53 libc::SYS_renameat,
54 libc::SYS_renameat2,
55 libc::SYS_linkat,
56 libc::SYS_symlinkat,
57 libc::SYS_fchmod,
58 libc::SYS_fchmodat,
59 libc::SYS_truncate,
60 libc::SYS_ftruncate,
61 libc::SYS_fallocate,
62 #[cfg(target_arch = "x86_64")]
63 libc::SYS_fadvise64,
64 libc::SYS_fsync,
65 libc::SYS_fdatasync,
66 libc::SYS_flock,
67 #[cfg(target_arch = "x86_64")]
68 libc::SYS_sendfile,
69 libc::SYS_copy_file_range,
70 libc::SYS_splice,
71 libc::SYS_tee,
72 libc::SYS_mmap,
74 libc::SYS_munmap,
75 libc::SYS_brk,
76 libc::SYS_mremap,
77 libc::SYS_madvise,
78 libc::SYS_msync,
79 libc::SYS_execve,
85 libc::SYS_wait4,
87 libc::SYS_waitid,
88 libc::SYS_exit,
89 libc::SYS_exit_group,
90 libc::SYS_getpid,
91 libc::SYS_gettid,
92 libc::SYS_getuid,
93 libc::SYS_getgid,
94 libc::SYS_geteuid,
95 libc::SYS_getegid,
96 libc::SYS_getppid,
97 libc::SYS_setsid,
98 libc::SYS_getgroups,
99 libc::SYS_rt_sigaction,
101 libc::SYS_rt_sigprocmask,
102 libc::SYS_rt_sigreturn,
103 libc::SYS_rt_sigsuspend,
104 libc::SYS_sigaltstack,
105 libc::SYS_kill,
106 libc::SYS_tgkill,
107 libc::SYS_clock_gettime,
109 libc::SYS_clock_getres,
110 libc::SYS_clock_nanosleep,
111 libc::SYS_gettimeofday,
112 libc::SYS_nanosleep,
113 libc::SYS_getcwd,
115 libc::SYS_chdir,
116 libc::SYS_fchdir,
117 libc::SYS_mkdirat,
118 libc::SYS_getdents64,
119 libc::SYS_uname,
121 libc::SYS_getrandom,
122 libc::SYS_futex,
123 libc::SYS_set_tid_address,
124 libc::SYS_set_robust_list,
125 libc::SYS_get_robust_list,
126 libc::SYS_sysinfo,
127 libc::SYS_umask,
128 libc::SYS_prlimit64,
129 libc::SYS_getrusage,
130 libc::SYS_times,
131 libc::SYS_sched_yield,
132 libc::SYS_sched_getaffinity,
133 libc::SYS_getcpu,
134 libc::SYS_rseq,
135 libc::SYS_close_range,
136 libc::SYS_landlock_create_ruleset,
140 libc::SYS_landlock_add_rule,
141 libc::SYS_landlock_restrict_self,
142 libc::SYS_getsockname,
144 libc::SYS_getpeername,
145 libc::SYS_socketpair,
146 libc::SYS_getsockopt,
147 libc::SYS_ppoll,
149 libc::SYS_pselect6,
150 libc::SYS_epoll_create1,
151 libc::SYS_epoll_ctl,
152 libc::SYS_epoll_pwait,
153 libc::SYS_eventfd2,
154 libc::SYS_signalfd4,
155 libc::SYS_timerfd_create,
156 libc::SYS_timerfd_settime,
157 libc::SYS_timerfd_gettime,
158 ];
159
160 #[cfg(target_arch = "x86_64")]
162 syscalls.extend_from_slice(&[
163 libc::SYS_open,
164 libc::SYS_stat,
165 libc::SYS_lstat,
166 libc::SYS_access,
167 libc::SYS_readlink,
168 libc::SYS_dup2,
169 libc::SYS_pipe,
170 libc::SYS_unlink,
171 libc::SYS_rename,
172 libc::SYS_link,
173 libc::SYS_symlink,
174 libc::SYS_chmod,
175 libc::SYS_mkdir,
176 libc::SYS_rmdir,
177 libc::SYS_getdents,
178 libc::SYS_getpgrp,
179 libc::SYS_arch_prctl,
180 libc::SYS_getrlimit,
181 libc::SYS_poll,
182 libc::SYS_select,
183 libc::SYS_epoll_create,
184 libc::SYS_epoll_wait,
185 libc::SYS_eventfd,
186 libc::SYS_signalfd,
187 ]);
188
189 syscalls
190 }
191
192 fn allowed_socket_domains(allow_network: bool) -> Vec<i32> {
193 if allow_network {
194 vec![libc::AF_UNIX, libc::AF_INET, libc::AF_INET6]
195 } else {
196 vec![libc::AF_UNIX]
197 }
198 }
199
200 fn network_mode_syscalls(allow_network: bool) -> Vec<i64> {
201 if allow_network {
202 vec![
203 libc::SYS_connect,
204 libc::SYS_sendto,
205 libc::SYS_recvfrom,
206 libc::SYS_sendmsg,
207 libc::SYS_recvmsg,
208 libc::SYS_shutdown,
209 libc::SYS_bind,
210 libc::SYS_listen,
211 libc::SYS_accept,
212 libc::SYS_accept4,
213 libc::SYS_setsockopt,
214 ]
215 } else {
216 Vec::new()
217 }
218 }
219
220 fn minimal_filter(allow_network: bool) -> Result<BTreeMap<i64, Vec<SeccompRule>>> {
230 let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
231
232 let allowed_syscalls = Self::base_allowed_syscalls();
234
235 for syscall in allowed_syscalls {
237 rules.insert(syscall, Vec::new());
238 }
239
240 for syscall in Self::network_mode_syscalls(allow_network) {
242 rules.insert(syscall, Vec::new());
243 }
244
245 let mut socket_rules = Vec::new();
248 for domain in Self::allowed_socket_domains(allow_network) {
249 let condition = SeccompCondition::new(
250 0, seccompiler::SeccompCmpArgLen::Dword,
252 seccompiler::SeccompCmpOp::Eq,
253 domain as u64,
254 )
255 .map_err(|e| {
256 NucleusError::SeccompError(format!(
257 "Failed to create socket domain condition: {}",
258 e
259 ))
260 })?;
261 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
262 NucleusError::SeccompError(format!("Failed to create socket rule: {}", e))
263 })?;
264 socket_rules.push(rule);
265 }
266 rules.insert(libc::SYS_socket, socket_rules);
267
268 let ioctl_allowed: &[u64] = &[
270 0x5401, 0x5402, 0x5403, 0x5404, 0x540B, 0x540F, 0x5410, 0x5413, 0x5429, 0x541B, 0x5451, 0x5450, ];
285 let mut ioctl_rules = Vec::new();
286 for &request in ioctl_allowed {
287 let condition = SeccompCondition::new(
288 1, seccompiler::SeccompCmpArgLen::Dword,
290 seccompiler::SeccompCmpOp::Eq,
291 request,
292 )
293 .map_err(|e| {
294 NucleusError::SeccompError(format!("Failed to create ioctl condition: {}", e))
295 })?;
296 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
297 NucleusError::SeccompError(format!("Failed to create ioctl rule: {}", e))
298 })?;
299 ioctl_rules.push(rule);
300 }
301 rules.insert(libc::SYS_ioctl, ioctl_rules);
302
303 let prctl_allowed: &[u64] = &[
309 1, 2, 15, 16, 38, 39, ];
316 let mut prctl_rules = Vec::new();
317 for &option in prctl_allowed {
318 let condition = SeccompCondition::new(
319 0, seccompiler::SeccompCmpArgLen::Dword,
321 seccompiler::SeccompCmpOp::Eq,
322 option,
323 )
324 .map_err(|e| {
325 NucleusError::SeccompError(format!("Failed to create prctl condition: {}", e))
326 })?;
327 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
328 NucleusError::SeccompError(format!("Failed to create prctl rule: {}", e))
329 })?;
330 prctl_rules.push(rule);
331 }
332 rules.insert(libc::SYS_prctl, prctl_rules);
333
334 let mut mprotect_rules = Vec::new();
336 for allowed in [0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64] {
337 let condition = SeccompCondition::new(
338 2, seccompiler::SeccompCmpArgLen::Dword,
340 seccompiler::SeccompCmpOp::MaskedEq((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
341 allowed,
342 )
343 .map_err(|e| {
344 NucleusError::SeccompError(format!("Failed to create mprotect condition: {}", e))
345 })?;
346 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
347 NucleusError::SeccompError(format!("Failed to create mprotect rule: {}", e))
348 })?;
349 mprotect_rules.push(rule);
350 }
351 rules.insert(libc::SYS_mprotect, mprotect_rules);
352
353 rules.insert(libc::SYS_clone3, Vec::new());
365
366 let clone_condition = SeccompCondition::new(
368 0, seccompiler::SeccompCmpArgLen::Qword,
370 seccompiler::SeccompCmpOp::MaskedEq(DENIED_CLONE_NAMESPACE_FLAGS),
371 0, )
373 .map_err(|e| {
374 NucleusError::SeccompError(format!("Failed to create clone condition: {}", e))
375 })?;
376 let clone_rule = SeccompRule::new(vec![clone_condition]).map_err(|e| {
377 NucleusError::SeccompError(format!("Failed to create clone rule: {}", e))
378 })?;
379 rules.insert(libc::SYS_clone, vec![clone_rule]);
380
381 let execveat_condition = SeccompCondition::new(
388 4, seccompiler::SeccompCmpArgLen::Dword,
390 seccompiler::SeccompCmpOp::MaskedEq(libc::AT_EMPTY_PATH as u64),
391 0, )
393 .map_err(|e| {
394 NucleusError::SeccompError(format!("Failed to create execveat condition: {}", e))
395 })?;
396 let execveat_rule = SeccompRule::new(vec![execveat_condition]).map_err(|e| {
397 NucleusError::SeccompError(format!("Failed to create execveat rule: {}", e))
398 })?;
399 rules.insert(libc::SYS_execveat, vec![execveat_rule]);
400
401 Ok(rules)
402 }
403
404 pub fn compile_minimal_filter() -> Result<BpfProgram> {
409 let rules = Self::minimal_filter(true)?;
410 let filter = SeccompFilter::new(
411 rules,
412 SeccompAction::Errno(libc::EPERM as u32),
413 SeccompAction::Allow,
414 std::env::consts::ARCH.try_into().map_err(|e| {
415 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
416 })?,
417 )
418 .map_err(|e| {
419 NucleusError::SeccompError(format!("Failed to create seccomp filter: {}", e))
420 })?;
421
422 let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
423 NucleusError::SeccompError(format!("Failed to compile BPF program: {}", e))
424 })?;
425
426 Ok(bpf_prog)
427 }
428
429 pub fn apply_minimal_filter(&mut self) -> Result<bool> {
437 self.apply_minimal_filter_with_mode(false, false)
438 }
439
440 pub fn apply_minimal_filter_with_mode(
445 &mut self,
446 best_effort: bool,
447 log_denied: bool,
448 ) -> Result<bool> {
449 self.apply_filter_for_network_mode(true, best_effort, log_denied)
450 }
451
452 pub fn apply_filter_for_network_mode(
461 &mut self,
462 allow_network: bool,
463 best_effort: bool,
464 log_denied: bool,
465 ) -> Result<bool> {
466 if self.applied {
467 debug!("Seccomp filter already applied, skipping");
468 return Ok(true);
469 }
470
471 info!(allow_network, "Applying seccomp filter");
472
473 let rules = match Self::minimal_filter(allow_network) {
474 Ok(r) => r,
475 Err(e) => {
476 if best_effort {
477 warn!(
478 "Failed to create seccomp rules: {} (continuing without seccomp)",
479 e
480 );
481 return Ok(false);
482 }
483 return Err(e);
484 }
485 };
486
487 let filter = match SeccompFilter::new(
488 rules,
489 SeccompAction::Errno(libc::EPERM as u32), SeccompAction::Allow, std::env::consts::ARCH.try_into().map_err(|e| {
492 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
493 })?,
494 ) {
495 Ok(f) => f,
496 Err(e) => {
497 if best_effort {
498 warn!(
499 "Failed to create seccomp filter: {} (continuing without seccomp)",
500 e
501 );
502 return Ok(false);
503 }
504 return Err(NucleusError::SeccompError(format!(
505 "Failed to create seccomp filter: {}",
506 e
507 )));
508 }
509 };
510
511 let bpf_prog: BpfProgram = match filter.try_into() {
512 Ok(p) => p,
513 Err(e) => {
514 if best_effort {
515 warn!(
516 "Failed to compile BPF program: {} (continuing without seccomp)",
517 e
518 );
519 return Ok(false);
520 }
521 return Err(NucleusError::SeccompError(format!(
522 "Failed to compile BPF program: {}",
523 e
524 )));
525 }
526 };
527
528 match Self::apply_bpf_program(&bpf_prog, log_denied) {
530 Ok(_) => {
531 self.applied = true;
532 info!("Successfully applied seccomp filter");
533 Ok(true)
534 }
535 Err(e) => {
536 if best_effort {
537 warn!(
538 "Failed to apply seccomp filter: {} (continuing without seccomp)",
539 e
540 );
541 Ok(false)
542 } else {
543 Err(NucleusError::SeccompError(format!(
544 "Failed to apply seccomp filter: {}",
545 e
546 )))
547 }
548 }
549 }
550 }
551
552 pub fn apply_profile_from_file(
571 &mut self,
572 profile_path: &Path,
573 expected_sha256: Option<&str>,
574 audit_mode: bool,
575 ) -> Result<bool> {
576 if self.applied {
577 debug!("Seccomp filter already applied, skipping");
578 return Ok(true);
579 }
580
581 info!("Loading seccomp profile from {:?}", profile_path);
582
583 let content = std::fs::read(profile_path).map_err(|e| {
585 NucleusError::SeccompError(format!(
586 "Failed to read seccomp profile {:?}: {}",
587 profile_path, e
588 ))
589 })?;
590
591 if let Some(expected) = expected_sha256 {
593 let actual = sha256_hex(&content);
594 if actual != expected {
595 return Err(NucleusError::SeccompError(format!(
596 "Seccomp profile hash mismatch: expected {}, got {}",
597 expected, actual
598 )));
599 }
600 info!("Seccomp profile hash verified: {}", actual);
601 }
602
603 let profile: SeccompProfile = serde_json::from_slice(&content).map_err(|e| {
605 NucleusError::SeccompError(format!("Failed to parse seccomp profile: {}", e))
606 })?;
607
608 Self::warn_missing_arg_filters(&profile);
613
614 let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
616
617 for syscall_group in &profile.syscalls {
618 if syscall_group.action == "SCMP_ACT_ALLOW" {
619 for name in &syscall_group.names {
620 if let Some(nr) = syscall_name_to_number(name) {
621 rules.insert(nr, Vec::new());
622 } else {
623 warn!("Unknown syscall in profile: {} (skipping)", name);
624 }
625 }
626 }
627 }
628
629 let builtin_rules = Self::minimal_filter(true)?;
634 for syscall_name in Self::ARG_FILTERED_SYSCALLS {
635 if let Some(nr) = syscall_name_to_number(syscall_name) {
636 if let std::collections::btree_map::Entry::Occupied(mut entry) = rules.entry(nr) {
637 if let Some(builtin) = builtin_rules.get(&nr) {
638 if !builtin.is_empty() {
639 info!(
640 "Merging built-in argument filters for '{}' into custom profile",
641 syscall_name
642 );
643 entry.insert(builtin.clone());
644 }
645 }
646 }
647 }
648 }
649 rules.remove(&libc::SYS_clone3);
651
652 let filter = SeccompFilter::new(
653 rules,
654 SeccompAction::Errno(libc::EPERM as u32),
655 SeccompAction::Allow,
656 std::env::consts::ARCH.try_into().map_err(|e| {
657 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
658 })?,
659 )
660 .map_err(|e| {
661 NucleusError::SeccompError(format!(
662 "Failed to create seccomp filter from profile: {}",
663 e
664 ))
665 })?;
666
667 let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
668 NucleusError::SeccompError(format!("Failed to compile BPF program from profile: {}", e))
669 })?;
670
671 match Self::apply_bpf_program(&bpf_prog, audit_mode) {
672 Ok(_) => {
673 self.applied = true;
674 info!(
675 "Seccomp profile applied from {:?} (log_denied={})",
676 profile_path, audit_mode
677 );
678 Ok(true)
679 }
680 Err(e) => Err(e),
681 }
682 }
683
684 pub fn apply_trace_filter(&mut self) -> Result<bool> {
689 if self.applied {
690 debug!("Seccomp filter already applied, skipping trace filter");
691 return Ok(true);
692 }
693
694 info!("Applying seccomp trace filter (allow-all + LOG)");
695
696 let filter = SeccompFilter::new(
700 BTreeMap::new(),
701 SeccompAction::Allow, SeccompAction::Allow, std::env::consts::ARCH.try_into().map_err(|e| {
704 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
705 })?,
706 )
707 .map_err(|e| NucleusError::SeccompError(format!("Failed to create trace filter: {}", e)))?;
708
709 let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
710 NucleusError::SeccompError(format!("Failed to compile trace BPF: {}", e))
711 })?;
712
713 Self::apply_bpf_program(&bpf_prog, true)?;
715 self.applied = true;
716 info!("Seccomp trace filter applied (all syscalls allowed + logged)");
717 Ok(true)
718 }
719
720 const ARG_FILTERED_SYSCALLS: &'static [&'static str] = &[
723 "clone", "clone3", "execveat", "ioctl", "mprotect", "prctl", "socket",
724 ];
725
726 fn warn_missing_arg_filters(profile: &SeccompProfile) {
729 for group in &profile.syscalls {
730 if group.action != "SCMP_ACT_ALLOW" {
731 continue;
732 }
733 for name in &group.names {
734 if Self::ARG_FILTERED_SYSCALLS.contains(&name.as_str()) && group.args.is_empty() {
735 warn!(
736 "Custom seccomp profile allows '{}' without argument filters. \
737 The built-in filter restricts this syscall at the argument level. \
738 This profile weakens security compared to the default.",
739 name
740 );
741 }
742 }
743 }
744 }
745
746 pub fn is_applied(&self) -> bool {
748 self.applied
749 }
750
751 fn apply_bpf_program(bpf_prog: &BpfProgram, log_denied: bool) -> Result<()> {
752 let mut flags: libc::c_ulong = 0;
753 if log_denied {
754 flags |= libc::SECCOMP_FILTER_FLAG_LOG as libc::c_ulong;
755 }
756
757 match Self::apply_bpf_program_with_flags(bpf_prog, flags) {
758 Ok(()) => Ok(()),
759 Err(err)
760 if log_denied
761 && err.raw_os_error() == Some(libc::EINVAL)
762 && libc::SECCOMP_FILTER_FLAG_LOG != 0 =>
763 {
764 warn!(
765 "Kernel rejected SECCOMP_FILTER_FLAG_LOG; continuing with seccomp \
766 enforcement without deny logging"
767 );
768 Self::apply_bpf_program_with_flags(bpf_prog, 0)?;
769 Ok(())
770 }
771 Err(err) => Err(NucleusError::SeccompError(format!(
772 "Failed to apply seccomp filter: {}",
773 err
774 ))),
775 }
776 }
777
778 fn apply_bpf_program_with_flags(
779 bpf_prog: &BpfProgram,
780 flags: libc::c_ulong,
781 ) -> std::io::Result<()> {
782 let rc = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
785 if rc != 0 {
786 return Err(std::io::Error::last_os_error());
787 }
788
789 let prog = libc::sock_fprog {
790 len: bpf_prog.len() as u16,
791 filter: bpf_prog.as_ptr() as *mut libc::sock_filter,
792 };
793
794 let rc = unsafe {
797 libc::syscall(
798 libc::SYS_seccomp,
799 libc::SECCOMP_SET_MODE_FILTER,
800 flags,
801 &prog as *const libc::sock_fprog,
802 )
803 };
804
805 if rc < 0 {
806 return Err(std::io::Error::last_os_error());
807 }
808
809 Ok(())
810 }
811}
812
813use crate::security::seccomp_generate::SeccompProfile;
815
816fn syscall_name_to_number(name: &str) -> Option<i64> {
820 match name {
821 "read" => Some(libc::SYS_read),
823 "write" => Some(libc::SYS_write),
824 #[cfg(target_arch = "x86_64")]
825 "open" => Some(libc::SYS_open),
826 "openat" => Some(libc::SYS_openat),
827 "close" => Some(libc::SYS_close),
828 #[cfg(target_arch = "x86_64")]
829 "stat" => Some(libc::SYS_stat),
830 "fstat" => Some(libc::SYS_fstat),
831 #[cfg(target_arch = "x86_64")]
832 "lstat" => Some(libc::SYS_lstat),
833 "lseek" => Some(libc::SYS_lseek),
834 #[cfg(target_arch = "x86_64")]
835 "access" => Some(libc::SYS_access),
836 "fcntl" => Some(libc::SYS_fcntl),
837 "readv" => Some(libc::SYS_readv),
838 "writev" => Some(libc::SYS_writev),
839 "pread64" => Some(libc::SYS_pread64),
840 "pwrite64" => Some(libc::SYS_pwrite64),
841 #[cfg(target_arch = "x86_64")]
842 "readlink" => Some(libc::SYS_readlink),
843 "readlinkat" => Some(libc::SYS_readlinkat),
844 "newfstatat" => Some(libc::SYS_newfstatat),
845 "statx" => Some(libc::SYS_statx),
846 "faccessat" => Some(libc::SYS_faccessat),
847 "faccessat2" => Some(libc::SYS_faccessat2),
848 "dup" => Some(libc::SYS_dup),
849 #[cfg(target_arch = "x86_64")]
850 "dup2" => Some(libc::SYS_dup2),
851 "dup3" => Some(libc::SYS_dup3),
852 #[cfg(target_arch = "x86_64")]
853 "pipe" => Some(libc::SYS_pipe),
854 "pipe2" => Some(libc::SYS_pipe2),
855 #[cfg(target_arch = "x86_64")]
856 "unlink" => Some(libc::SYS_unlink),
857 "unlinkat" => Some(libc::SYS_unlinkat),
858 #[cfg(target_arch = "x86_64")]
859 "rename" => Some(libc::SYS_rename),
860 "renameat" => Some(libc::SYS_renameat),
861 "renameat2" => Some(libc::SYS_renameat2),
862 #[cfg(target_arch = "x86_64")]
863 "link" => Some(libc::SYS_link),
864 "linkat" => Some(libc::SYS_linkat),
865 #[cfg(target_arch = "x86_64")]
866 "symlink" => Some(libc::SYS_symlink),
867 "symlinkat" => Some(libc::SYS_symlinkat),
868 #[cfg(target_arch = "x86_64")]
869 "chmod" => Some(libc::SYS_chmod),
870 "fchmod" => Some(libc::SYS_fchmod),
871 "fchmodat" => Some(libc::SYS_fchmodat),
872 "truncate" => Some(libc::SYS_truncate),
873 "ftruncate" => Some(libc::SYS_ftruncate),
874 "fallocate" => Some(libc::SYS_fallocate),
875 #[cfg(target_arch = "x86_64")]
876 "fadvise64" => Some(libc::SYS_fadvise64),
877 "fsync" => Some(libc::SYS_fsync),
878 "fdatasync" => Some(libc::SYS_fdatasync),
879 "flock" => Some(libc::SYS_flock),
880 #[cfg(target_arch = "x86_64")]
881 "sendfile" => Some(libc::SYS_sendfile),
882 "copy_file_range" => Some(libc::SYS_copy_file_range),
883 "splice" => Some(libc::SYS_splice),
884 "tee" => Some(libc::SYS_tee),
885 "mmap" => Some(libc::SYS_mmap),
887 "munmap" => Some(libc::SYS_munmap),
888 "mprotect" => Some(libc::SYS_mprotect),
889 "brk" => Some(libc::SYS_brk),
890 "mremap" => Some(libc::SYS_mremap),
891 "madvise" => Some(libc::SYS_madvise),
892 "msync" => Some(libc::SYS_msync),
893 "mlock" => Some(libc::SYS_mlock),
894 "munlock" => Some(libc::SYS_munlock),
895 #[cfg(target_arch = "x86_64")]
897 "fork" => Some(libc::SYS_fork),
898 "clone" => Some(libc::SYS_clone),
899 "clone3" => Some(libc::SYS_clone3),
900 "execve" => Some(libc::SYS_execve),
901 "execveat" => Some(libc::SYS_execveat),
902 "wait4" => Some(libc::SYS_wait4),
903 "waitid" => Some(libc::SYS_waitid),
904 "exit" => Some(libc::SYS_exit),
905 "exit_group" => Some(libc::SYS_exit_group),
906 "getpid" => Some(libc::SYS_getpid),
907 "gettid" => Some(libc::SYS_gettid),
908 "getuid" => Some(libc::SYS_getuid),
909 "getgid" => Some(libc::SYS_getgid),
910 "geteuid" => Some(libc::SYS_geteuid),
911 "getegid" => Some(libc::SYS_getegid),
912 "getppid" => Some(libc::SYS_getppid),
913 #[cfg(target_arch = "x86_64")]
914 "getpgrp" => Some(libc::SYS_getpgrp),
915 "setsid" => Some(libc::SYS_setsid),
916 "getgroups" => Some(libc::SYS_getgroups),
917 "rt_sigaction" => Some(libc::SYS_rt_sigaction),
919 "rt_sigprocmask" => Some(libc::SYS_rt_sigprocmask),
920 "rt_sigreturn" => Some(libc::SYS_rt_sigreturn),
921 "rt_sigsuspend" => Some(libc::SYS_rt_sigsuspend),
922 "sigaltstack" => Some(libc::SYS_sigaltstack),
923 "kill" => Some(libc::SYS_kill),
924 "tgkill" => Some(libc::SYS_tgkill),
925 "clock_gettime" => Some(libc::SYS_clock_gettime),
927 "clock_getres" => Some(libc::SYS_clock_getres),
928 "clock_nanosleep" => Some(libc::SYS_clock_nanosleep),
929 "gettimeofday" => Some(libc::SYS_gettimeofday),
930 "nanosleep" => Some(libc::SYS_nanosleep),
931 "getcwd" => Some(libc::SYS_getcwd),
933 "chdir" => Some(libc::SYS_chdir),
934 "fchdir" => Some(libc::SYS_fchdir),
935 #[cfg(target_arch = "x86_64")]
936 "mkdir" => Some(libc::SYS_mkdir),
937 "mkdirat" => Some(libc::SYS_mkdirat),
938 #[cfg(target_arch = "x86_64")]
939 "rmdir" => Some(libc::SYS_rmdir),
940 #[cfg(target_arch = "x86_64")]
941 "getdents" => Some(libc::SYS_getdents),
942 "getdents64" => Some(libc::SYS_getdents64),
943 "socket" => Some(libc::SYS_socket),
945 "connect" => Some(libc::SYS_connect),
946 "sendto" => Some(libc::SYS_sendto),
947 "recvfrom" => Some(libc::SYS_recvfrom),
948 "sendmsg" => Some(libc::SYS_sendmsg),
949 "recvmsg" => Some(libc::SYS_recvmsg),
950 "shutdown" => Some(libc::SYS_shutdown),
951 "bind" => Some(libc::SYS_bind),
952 "listen" => Some(libc::SYS_listen),
953 "accept" => Some(libc::SYS_accept),
954 "accept4" => Some(libc::SYS_accept4),
955 "setsockopt" => Some(libc::SYS_setsockopt),
956 "getsockopt" => Some(libc::SYS_getsockopt),
957 "getsockname" => Some(libc::SYS_getsockname),
958 "getpeername" => Some(libc::SYS_getpeername),
959 "socketpair" => Some(libc::SYS_socketpair),
960 #[cfg(target_arch = "x86_64")]
962 "poll" => Some(libc::SYS_poll),
963 "ppoll" => Some(libc::SYS_ppoll),
964 #[cfg(target_arch = "x86_64")]
965 "select" => Some(libc::SYS_select),
966 "pselect6" => Some(libc::SYS_pselect6),
967 #[cfg(target_arch = "x86_64")]
968 "epoll_create" => Some(libc::SYS_epoll_create),
969 "epoll_create1" => Some(libc::SYS_epoll_create1),
970 "epoll_ctl" => Some(libc::SYS_epoll_ctl),
971 #[cfg(target_arch = "x86_64")]
972 "epoll_wait" => Some(libc::SYS_epoll_wait),
973 "epoll_pwait" => Some(libc::SYS_epoll_pwait),
974 #[cfg(target_arch = "x86_64")]
975 "eventfd" => Some(libc::SYS_eventfd),
976 "eventfd2" => Some(libc::SYS_eventfd2),
977 #[cfg(target_arch = "x86_64")]
978 "signalfd" => Some(libc::SYS_signalfd),
979 "signalfd4" => Some(libc::SYS_signalfd4),
980 "timerfd_create" => Some(libc::SYS_timerfd_create),
981 "timerfd_settime" => Some(libc::SYS_timerfd_settime),
982 "timerfd_gettime" => Some(libc::SYS_timerfd_gettime),
983 "uname" => Some(libc::SYS_uname),
985 "getrandom" => Some(libc::SYS_getrandom),
986 "futex" => Some(libc::SYS_futex),
987 "set_tid_address" => Some(libc::SYS_set_tid_address),
988 "set_robust_list" => Some(libc::SYS_set_robust_list),
989 "get_robust_list" => Some(libc::SYS_get_robust_list),
990 #[cfg(target_arch = "x86_64")]
991 "arch_prctl" => Some(libc::SYS_arch_prctl),
992 "sysinfo" => Some(libc::SYS_sysinfo),
993 "umask" => Some(libc::SYS_umask),
994 #[cfg(target_arch = "x86_64")]
995 "getrlimit" => Some(libc::SYS_getrlimit),
996 "prlimit64" => Some(libc::SYS_prlimit64),
997 "getrusage" => Some(libc::SYS_getrusage),
998 "times" => Some(libc::SYS_times),
999 "sched_yield" => Some(libc::SYS_sched_yield),
1000 "sched_getaffinity" => Some(libc::SYS_sched_getaffinity),
1001 "getcpu" => Some(libc::SYS_getcpu),
1002 "rseq" => Some(libc::SYS_rseq),
1003 "close_range" => Some(libc::SYS_close_range),
1004 "memfd_create" => Some(libc::SYS_memfd_create),
1005 "ioctl" => Some(libc::SYS_ioctl),
1006 "prctl" => Some(libc::SYS_prctl),
1007 "landlock_create_ruleset" => Some(libc::SYS_landlock_create_ruleset),
1009 "landlock_add_rule" => Some(libc::SYS_landlock_add_rule),
1010 "landlock_restrict_self" => Some(libc::SYS_landlock_restrict_self),
1011 _ => None,
1012 }
1013}
1014
1015impl Default for SeccompManager {
1016 fn default() -> Self {
1017 Self::new()
1018 }
1019}
1020
1021#[cfg(test)]
1022mod tests {
1023 use super::*;
1024
1025 #[test]
1026 fn test_seccomp_manager_initial_state() {
1027 let mgr = SeccompManager::new();
1028 assert!(!mgr.is_applied());
1029 }
1030
1031 #[test]
1032 fn test_apply_idempotent() {
1033 let mgr = SeccompManager::new();
1034 assert!(!mgr.is_applied());
1038 }
1039
1040 #[test]
1041 fn test_clone_denied_flags_include_newcgroup() {
1042 assert_ne!(
1043 DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWCGROUP as u64,
1044 0
1045 );
1046 }
1047
1048 #[test]
1049 fn test_clone_denied_flags_include_newtime() {
1050 assert_ne!(
1051 DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWTIME as u64,
1052 0,
1053 "CLONE_NEWTIME must be in denied clone namespace flags"
1054 );
1055 }
1056
1057 #[test]
1058 fn test_network_none_socket_domains_are_unix_only() {
1059 let domains = SeccompManager::allowed_socket_domains(false);
1060 assert_eq!(domains, vec![libc::AF_UNIX]);
1061 }
1062
1063 #[test]
1064 fn test_network_enabled_socket_domains_exclude_netlink() {
1065 let domains = SeccompManager::allowed_socket_domains(true);
1066 assert!(domains.contains(&libc::AF_UNIX));
1067 assert!(domains.contains(&libc::AF_INET));
1068 assert!(domains.contains(&libc::AF_INET6));
1069 assert!(!domains.contains(&libc::AF_NETLINK));
1070 }
1071
1072 #[test]
1073 fn test_network_mode_syscalls_only_enabled_when_network_allowed() {
1074 let none = SeccompManager::network_mode_syscalls(false);
1075 assert!(none.is_empty());
1076
1077 let enabled = SeccompManager::network_mode_syscalls(true);
1078 assert!(enabled.contains(&libc::SYS_connect));
1079 assert!(enabled.contains(&libc::SYS_bind));
1080 assert!(enabled.contains(&libc::SYS_listen));
1081 assert!(enabled.contains(&libc::SYS_accept));
1082 assert!(enabled.contains(&libc::SYS_setsockopt));
1083 }
1084
1085 #[test]
1086 fn test_landlock_bootstrap_syscalls_present_in_base_allowlist() {
1087 let base = SeccompManager::base_allowed_syscalls();
1088 assert!(base.contains(&libc::SYS_landlock_create_ruleset));
1089 assert!(base.contains(&libc::SYS_landlock_add_rule));
1090 assert!(base.contains(&libc::SYS_landlock_restrict_self));
1091 }
1092
1093 #[test]
1094 fn test_x32_legacy_range_not_allowlisted() {
1095 let base = SeccompManager::base_allowed_syscalls();
1096 let net = SeccompManager::network_mode_syscalls(true);
1097 for nr in 512_i64..=547_i64 {
1098 assert!(
1099 !base.contains(&nr) && !net.contains(&nr),
1100 "x32 syscall number {} unexpectedly allowlisted",
1101 nr
1102 );
1103 }
1104 }
1105
1106 #[test]
1107 fn test_i386_compat_socketcall_range_not_allowlisted() {
1108 let base = SeccompManager::base_allowed_syscalls();
1109 let net = SeccompManager::network_mode_syscalls(true);
1110 for nr in 359_i64..=373_i64 {
1113 assert!(
1114 !base.contains(&nr) && !net.contains(&nr),
1115 "i386 compat syscall number {} unexpectedly allowlisted",
1116 nr
1117 );
1118 }
1119 }
1120
1121 #[test]
1122 fn test_minimal_filter_allowlist_counts_are_stable() {
1123 let base = SeccompManager::base_allowed_syscalls();
1124 let net = SeccompManager::network_mode_syscalls(true);
1125
1126 assert_eq!(base.len(), 131);
1132 assert_eq!(net.len(), 11);
1133 assert_eq!(base.len() + 7, 138);
1134 assert_eq!(base.len() + net.len() + 7, 149);
1135 }
1136
1137 #[test]
1138 fn test_arg_filtered_syscalls_list_includes_critical_syscalls() {
1139 for name in &["clone", "clone3", "execveat", "ioctl", "prctl", "socket"] {
1142 assert!(
1143 SeccompManager::ARG_FILTERED_SYSCALLS.contains(name),
1144 "'{}' must be in ARG_FILTERED_SYSCALLS",
1145 name
1146 );
1147 }
1148 }
1149
1150 #[test]
1151 fn test_clone3_allowed_in_minimal_filter() {
1152 let rules = SeccompManager::minimal_filter(true).unwrap();
1157 assert!(
1158 rules.contains_key(&libc::SYS_clone3),
1159 "clone3 must be in the seccomp allowlist (glibc 2.34+ requires it)"
1160 );
1161 }
1162
1163 #[test]
1164 fn test_clone_is_allowed_with_arg_filter() {
1165 let rules = SeccompManager::minimal_filter(true).unwrap();
1167 assert!(
1168 rules.contains_key(&libc::SYS_clone),
1169 "clone must be in the seccomp allowlist with arg filters"
1170 );
1171 }
1172
1173 #[test]
1174 fn test_high_risk_syscalls_removed_from_base_allowlist() {
1175 let base = SeccompManager::base_allowed_syscalls();
1176 let removed = [
1177 libc::SYS_chown,
1178 libc::SYS_fchown,
1179 libc::SYS_lchown,
1180 libc::SYS_fchownat,
1181 libc::SYS_sync,
1182 libc::SYS_syncfs,
1183 libc::SYS_mlock,
1184 libc::SYS_munlock,
1185 libc::SYS_mincore,
1186 libc::SYS_vfork,
1187 libc::SYS_tkill,
1188 ];
1189
1190 for syscall in removed {
1191 assert!(
1192 !base.contains(&syscall),
1193 "syscall {} unexpectedly present in base allowlist",
1194 syscall
1195 );
1196 }
1197 }
1198
1199 #[test]
1200 fn test_custom_profile_preserves_clone_arg_filters() {
1201 let rules = SeccompManager::minimal_filter(true).unwrap();
1206
1207 for name in SeccompManager::ARG_FILTERED_SYSCALLS {
1212 if *name == "clone3" {
1213 continue;
1217 }
1218 if let Some(nr) = syscall_name_to_number(name) {
1219 let entry = rules.get(&nr);
1220 assert!(
1221 entry.is_some() && !entry.unwrap().is_empty(),
1222 "built-in filter must have argument-level rules for '{}' \
1223 so apply_profile_from_file can merge them into custom profiles",
1224 name
1225 );
1226 }
1227 }
1228 }
1229
1230 #[test]
1231 fn test_memfd_create_not_in_default_allowlist() {
1232 let base = SeccompManager::base_allowed_syscalls();
1234 assert!(
1235 !base.contains(&libc::SYS_memfd_create),
1236 "memfd_create must not be in the default seccomp allowlist (fileless exec risk)"
1237 );
1238 let rules = SeccompManager::minimal_filter(true).unwrap();
1240 assert!(
1241 !rules.contains_key(&libc::SYS_memfd_create),
1242 "memfd_create must not be in the compiled seccomp filter rules"
1243 );
1244 }
1245
1246 #[test]
1247 fn test_mprotect_has_arg_filtering() {
1248 let base = SeccompManager::base_allowed_syscalls();
1253 assert!(
1254 !base.contains(&libc::SYS_mprotect),
1255 "SYS_mprotect must not be unconditionally allowed - needs arg filtering"
1256 );
1257
1258 let rules = SeccompManager::minimal_filter(true).unwrap();
1261 let mprotect_rules = rules.get(&libc::SYS_mprotect);
1262 assert!(
1263 mprotect_rules.is_some(),
1264 "mprotect must be present in the seccomp filter rules"
1265 );
1266 assert!(
1267 !mprotect_rules.unwrap().is_empty(),
1268 "mprotect must have argument-level conditions to prevent W^X violations"
1269 );
1270 }
1271
1272 #[test]
1273 fn test_unsafe_blocks_have_safety_comments() {
1274 let source = include_str!("seccomp.rs");
1276 let mut pos = 0;
1277 while let Some(idx) = source[pos..].find("unsafe {") {
1278 let abs_idx = pos + idx;
1279 let start = abs_idx.saturating_sub(200);
1281 let context = &source[start..abs_idx];
1282 assert!(
1283 context.contains("SAFETY:"),
1284 "unsafe block at byte {} must have a // SAFETY: comment. Context: ...{}...",
1285 abs_idx,
1286 &source[abs_idx.saturating_sub(80)..abs_idx + 10]
1287 );
1288 pos = abs_idx + 1;
1289 }
1290 }
1291
1292 fn mprotect_would_allow(prot: u64) -> bool {
1302 let mask = (libc::PROT_WRITE | libc::PROT_EXEC) as u64;
1303 let allowed_values: &[u64] = &[0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64];
1304 let masked = prot & mask;
1305 allowed_values.contains(&masked)
1306 }
1307
1308 #[test]
1309 fn test_mprotect_allows_prot_none() {
1310 assert!(mprotect_would_allow(0), "PROT_NONE must be allowed");
1311 }
1312
1313 #[test]
1314 fn test_mprotect_allows_prot_read_only() {
1315 assert!(
1316 mprotect_would_allow(libc::PROT_READ as u64),
1317 "PROT_READ must be allowed (W|X bits are 0)"
1318 );
1319 }
1320
1321 #[test]
1322 fn test_mprotect_allows_prot_read_write() {
1323 assert!(
1324 mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE) as u64),
1325 "PROT_READ|PROT_WRITE must be allowed"
1326 );
1327 }
1328
1329 #[test]
1330 fn test_mprotect_allows_prot_read_exec() {
1331 assert!(
1332 mprotect_would_allow((libc::PROT_READ | libc::PROT_EXEC) as u64),
1333 "PROT_READ|PROT_EXEC must be allowed"
1334 );
1335 }
1336
1337 #[test]
1338 fn test_mprotect_rejects_prot_write_exec() {
1339 assert!(
1340 !mprotect_would_allow((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
1341 "PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
1342 );
1343 }
1344
1345 #[test]
1346 fn test_mprotect_rejects_prot_read_write_exec() {
1347 assert!(
1348 !mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC) as u64),
1349 "PROT_READ|PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
1350 );
1351 }
1352
1353 #[test]
1354 fn test_mprotect_allows_prot_write_alone() {
1355 assert!(
1356 mprotect_would_allow(libc::PROT_WRITE as u64),
1357 "PROT_WRITE alone must be allowed"
1358 );
1359 }
1360
1361 #[test]
1362 fn test_mprotect_allows_prot_exec_alone() {
1363 assert!(
1364 mprotect_would_allow(libc::PROT_EXEC as u64),
1365 "PROT_EXEC alone must be allowed"
1366 );
1367 }
1368}