1use crate::error::{NucleusError, Result};
2use crate::security::policy::sha256_hex;
3#[cfg(any(
4 target_arch = "x86_64",
5 target_arch = "aarch64",
6 target_arch = "riscv64"
7))]
8use crate::security::syscall_numbers::{SYS_FADVISE64, SYS_SENDFILE};
9use seccompiler::{BpfProgram, SeccompAction, SeccompCondition, SeccompFilter, SeccompRule};
10use std::collections::BTreeMap;
11use std::path::Path;
12use tracing::{debug, info, warn};
13
14pub struct SeccompManager {
19 applied: bool,
20}
21
22const DENIED_CLONE_NAMESPACE_FLAGS: u64 = (libc::CLONE_NEWUSER
23 | libc::CLONE_NEWNS
24 | libc::CLONE_NEWNET
25 | libc::CLONE_NEWIPC
26 | libc::CLONE_NEWUTS
27 | libc::CLONE_NEWPID
28 | libc::CLONE_NEWCGROUP
29 | libc::CLONE_NEWTIME) as u64;
30
31impl SeccompManager {
32 pub fn new() -> Self {
33 Self { applied: false }
34 }
35
36 fn base_allowed_syscalls() -> Vec<i64> {
37 let mut syscalls = vec![
38 libc::SYS_read,
40 libc::SYS_write,
41 libc::SYS_openat,
42 libc::SYS_close,
43 libc::SYS_fstat,
44 libc::SYS_lseek,
45 libc::SYS_fcntl,
46 libc::SYS_readv,
47 libc::SYS_writev,
48 libc::SYS_preadv,
49 libc::SYS_pwritev,
50 libc::SYS_pread64,
51 libc::SYS_pwrite64,
52 libc::SYS_readlinkat,
53 libc::SYS_newfstatat,
54 libc::SYS_statx,
55 libc::SYS_faccessat,
56 libc::SYS_faccessat2,
57 libc::SYS_dup,
58 libc::SYS_dup3,
59 libc::SYS_pipe2,
60 libc::SYS_unlinkat,
61 libc::SYS_renameat,
62 libc::SYS_renameat2,
63 libc::SYS_linkat,
64 libc::SYS_symlinkat,
65 libc::SYS_fchmod,
66 libc::SYS_fchmodat,
67 libc::SYS_truncate,
68 libc::SYS_ftruncate,
69 libc::SYS_fallocate,
70 #[cfg(any(
71 target_arch = "x86_64",
72 target_arch = "aarch64",
73 target_arch = "riscv64"
74 ))]
75 SYS_FADVISE64,
76 libc::SYS_fsync,
77 libc::SYS_fdatasync,
78 libc::SYS_sync_file_range,
79 libc::SYS_flock,
80 libc::SYS_fstatfs,
81 libc::SYS_statfs,
82 #[cfg(any(
83 target_arch = "x86_64",
84 target_arch = "aarch64",
85 target_arch = "riscv64"
86 ))]
87 SYS_SENDFILE,
88 libc::SYS_copy_file_range,
89 libc::SYS_splice,
90 libc::SYS_tee,
91 libc::SYS_mmap,
93 libc::SYS_munmap,
94 libc::SYS_brk,
95 libc::SYS_mremap,
96 libc::SYS_madvise,
97 libc::SYS_msync,
98 libc::SYS_mlock,
99 libc::SYS_munlock,
100 libc::SYS_mlock2,
101 libc::SYS_shmget,
104 libc::SYS_shmat,
105 libc::SYS_shmdt,
106 libc::SYS_shmctl,
107 libc::SYS_semget,
109 libc::SYS_semop,
110 libc::SYS_semctl,
111 libc::SYS_semtimedop,
112 libc::SYS_execve,
118 libc::SYS_wait4,
120 libc::SYS_waitid,
121 libc::SYS_exit,
122 libc::SYS_exit_group,
123 libc::SYS_getpid,
124 libc::SYS_gettid,
125 libc::SYS_getuid,
126 libc::SYS_getgid,
127 libc::SYS_geteuid,
128 libc::SYS_getegid,
129 libc::SYS_getppid,
130 libc::SYS_setsid,
131 libc::SYS_getgroups,
132 libc::SYS_rt_sigaction,
134 libc::SYS_rt_sigprocmask,
135 libc::SYS_rt_sigreturn,
136 libc::SYS_rt_sigsuspend,
137 libc::SYS_rt_sigtimedwait,
138 libc::SYS_rt_sigpending,
139 libc::SYS_rt_sigqueueinfo,
140 libc::SYS_sigaltstack,
141 libc::SYS_restart_syscall,
142 libc::SYS_kill,
146 libc::SYS_tgkill,
147 libc::SYS_clock_gettime,
149 libc::SYS_clock_getres,
150 libc::SYS_clock_nanosleep,
151 libc::SYS_gettimeofday,
152 libc::SYS_nanosleep,
153 libc::SYS_setitimer,
154 libc::SYS_getitimer,
155 libc::SYS_getcwd,
157 libc::SYS_chdir,
158 libc::SYS_fchdir,
159 libc::SYS_mkdirat,
160 libc::SYS_getdents64,
161 libc::SYS_uname,
163 libc::SYS_getrandom,
164 libc::SYS_futex,
165 libc::SYS_set_tid_address,
166 libc::SYS_set_robust_list,
167 libc::SYS_get_robust_list,
168 libc::SYS_umask,
171 libc::SYS_getrusage,
173 libc::SYS_times,
174 libc::SYS_sched_yield,
175 libc::SYS_sched_getaffinity,
176 libc::SYS_sched_setaffinity,
177 libc::SYS_sched_getparam,
178 libc::SYS_sched_getscheduler,
179 libc::SYS_getcpu,
180 libc::SYS_getxattr,
182 libc::SYS_lgetxattr,
183 libc::SYS_fgetxattr,
184 libc::SYS_listxattr,
185 libc::SYS_llistxattr,
186 libc::SYS_flistxattr,
187 libc::SYS_rseq,
188 libc::SYS_close_range,
189 libc::SYS_fchown,
192 libc::SYS_fchownat,
193 libc::SYS_io_setup,
196 libc::SYS_io_destroy,
197 libc::SYS_io_submit,
198 libc::SYS_io_getevents,
199 libc::SYS_setpgid,
205 libc::SYS_getpgid,
206 libc::SYS_landlock_create_ruleset,
210 libc::SYS_landlock_add_rule,
211 libc::SYS_landlock_restrict_self,
212 libc::SYS_getsockname,
214 libc::SYS_getpeername,
215 libc::SYS_socketpair,
216 libc::SYS_getsockopt,
217 libc::SYS_ppoll,
219 libc::SYS_pselect6,
220 libc::SYS_epoll_create1,
221 libc::SYS_epoll_ctl,
222 libc::SYS_epoll_pwait,
223 libc::SYS_eventfd2,
224 libc::SYS_signalfd4,
225 libc::SYS_timerfd_create,
226 libc::SYS_timerfd_settime,
227 libc::SYS_timerfd_gettime,
228 ];
229
230 #[cfg(target_arch = "x86_64")]
232 syscalls.extend_from_slice(&[
233 libc::SYS_open,
234 libc::SYS_stat,
235 libc::SYS_lstat,
236 libc::SYS_access,
237 libc::SYS_readlink,
238 libc::SYS_dup2,
239 libc::SYS_pipe,
240 libc::SYS_unlink,
241 libc::SYS_rename,
242 libc::SYS_link,
243 libc::SYS_symlink,
244 libc::SYS_chmod,
245 libc::SYS_mkdir,
246 libc::SYS_rmdir,
247 libc::SYS_getdents,
248 libc::SYS_getpgrp,
249 libc::SYS_chown,
250 libc::SYS_fchown,
251 libc::SYS_lchown,
252 libc::SYS_arch_prctl,
253 libc::SYS_getrlimit,
254 libc::SYS_poll,
255 libc::SYS_select,
256 libc::SYS_epoll_create,
257 libc::SYS_epoll_wait,
258 libc::SYS_eventfd,
259 libc::SYS_signalfd,
260 ]);
261
262 syscalls
263 }
264
265 fn allowed_socket_domains(allow_network: bool) -> Vec<i32> {
266 if allow_network {
267 vec![libc::AF_UNIX, libc::AF_INET, libc::AF_INET6]
268 } else {
269 vec![libc::AF_UNIX]
270 }
271 }
272
273 fn network_mode_syscalls(allow_network: bool) -> Vec<i64> {
274 if allow_network {
275 vec![
276 libc::SYS_connect,
277 libc::SYS_sendto,
278 libc::SYS_recvfrom,
279 libc::SYS_sendmsg,
280 libc::SYS_recvmsg,
281 libc::SYS_shutdown,
282 libc::SYS_bind,
283 libc::SYS_listen,
284 libc::SYS_accept,
285 libc::SYS_accept4,
286 libc::SYS_setsockopt,
287 ]
288 } else {
289 Vec::new()
290 }
291 }
292
293 fn minimal_filter(
303 allow_network: bool,
304 extra_syscalls: &[String],
305 ) -> Result<BTreeMap<i64, Vec<SeccompRule>>> {
306 let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
307
308 let allowed_syscalls = Self::base_allowed_syscalls();
310
311 for syscall in allowed_syscalls {
313 rules.insert(syscall, Vec::new());
314 }
315
316 for syscall in Self::network_mode_syscalls(allow_network) {
318 rules.insert(syscall, Vec::new());
319 }
320
321 for name in extra_syscalls {
329 if Self::ARG_FILTERED_SYSCALLS.contains(&name.as_str()) {
330 continue;
331 }
332
333 if let Some(nr) = syscall_name_to_number(name) {
334 if let std::collections::btree_map::Entry::Vacant(entry) = rules.entry(nr) {
335 if Self::SECURITY_CRITICAL_DENIED_SYSCALLS.contains(&name.as_str()) {
336 warn!(
337 "--seccomp-allow: security-critical syscall '{}' is always blocked",
338 name
339 );
340 } else if Self::OPT_IN_SYSCALLS.contains(&name.as_str()) {
341 entry.insert(Vec::new());
342 } else {
343 warn!(
344 "--seccomp-allow: syscall '{}' is not in the opt-in allowlist – blocked",
345 name
346 );
347 }
348 }
349 } else {
350 warn!("--seccomp-allow: unknown syscall '{}' – blocked", name);
351 }
352 }
353
354 let mut socket_rules = Vec::new();
357 for domain in Self::allowed_socket_domains(allow_network) {
358 let condition = SeccompCondition::new(
359 0, seccompiler::SeccompCmpArgLen::Dword,
361 seccompiler::SeccompCmpOp::Eq,
362 domain as u64,
363 )
364 .map_err(|e| {
365 NucleusError::SeccompError(format!(
366 "Failed to create socket domain condition: {}",
367 e
368 ))
369 })?;
370 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
371 NucleusError::SeccompError(format!("Failed to create socket rule: {}", e))
372 })?;
373 socket_rules.push(rule);
374 }
375 rules.insert(libc::SYS_socket, socket_rules);
376
377 let ioctl_allowed: &[u64] = &[
379 0x5401, 0x5402, 0x5403, 0x5404, 0x540B, 0x540F, 0x5410, 0x5413, 0x5429, 0x541B, 0x5421, 0x5451, 0x5450, ];
395 let mut ioctl_rules = Vec::new();
396 for &request in ioctl_allowed {
397 let condition = SeccompCondition::new(
398 1, seccompiler::SeccompCmpArgLen::Dword,
400 seccompiler::SeccompCmpOp::Eq,
401 request,
402 )
403 .map_err(|e| {
404 NucleusError::SeccompError(format!("Failed to create ioctl condition: {}", e))
405 })?;
406 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
407 NucleusError::SeccompError(format!("Failed to create ioctl rule: {}", e))
408 })?;
409 ioctl_rules.push(rule);
410 }
411 rules.insert(libc::SYS_ioctl, ioctl_rules);
412
413 let prctl_allowed: &[u64] = &[
419 1, 2, 15, 16, 23, 27, 36, 37, 38, 40, 39, ];
433 let mut prctl_rules = Vec::new();
434 for &option in prctl_allowed {
435 let condition = SeccompCondition::new(
436 0, seccompiler::SeccompCmpArgLen::Dword,
438 seccompiler::SeccompCmpOp::Eq,
439 option,
440 )
441 .map_err(|e| {
442 NucleusError::SeccompError(format!("Failed to create prctl condition: {}", e))
443 })?;
444 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
445 NucleusError::SeccompError(format!("Failed to create prctl rule: {}", e))
446 })?;
447 prctl_rules.push(rule);
448 }
449
450 let ambient_option = SeccompCondition::new(
451 0, seccompiler::SeccompCmpArgLen::Dword,
453 seccompiler::SeccompCmpOp::Eq,
454 libc::PR_CAP_AMBIENT as u64,
455 )
456 .map_err(|e| {
457 NucleusError::SeccompError(format!(
458 "Failed to create PR_CAP_AMBIENT prctl condition: {}",
459 e
460 ))
461 })?;
462 let ambient_is_set = SeccompCondition::new(
463 1, seccompiler::SeccompCmpArgLen::Dword,
465 seccompiler::SeccompCmpOp::Eq,
466 libc::PR_CAP_AMBIENT_IS_SET as u64,
467 )
468 .map_err(|e| {
469 NucleusError::SeccompError(format!(
470 "Failed to create PR_CAP_AMBIENT_IS_SET prctl condition: {}",
471 e
472 ))
473 })?;
474 let ambient_probe_rule =
475 SeccompRule::new(vec![ambient_option, ambient_is_set]).map_err(|e| {
476 NucleusError::SeccompError(format!(
477 "Failed to create PR_CAP_AMBIENT_IS_SET prctl rule: {}",
478 e
479 ))
480 })?;
481 prctl_rules.push(ambient_probe_rule);
482 rules.insert(libc::SYS_prctl, prctl_rules);
483
484 let prlimit_condition = SeccompCondition::new(
487 2, seccompiler::SeccompCmpArgLen::Qword,
489 seccompiler::SeccompCmpOp::Eq,
490 0u64, )
492 .map_err(|e| {
493 NucleusError::SeccompError(format!("Failed to create prlimit64 condition: {}", e))
494 })?;
495 let prlimit_rule = SeccompRule::new(vec![prlimit_condition]).map_err(|e| {
496 NucleusError::SeccompError(format!("Failed to create prlimit64 rule: {}", e))
497 })?;
498 rules.insert(libc::SYS_prlimit64, vec![prlimit_rule]);
499
500 let mut mprotect_rules = Vec::new();
502 for allowed in [0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64] {
503 let condition = SeccompCondition::new(
504 2, seccompiler::SeccompCmpArgLen::Dword,
506 seccompiler::SeccompCmpOp::MaskedEq((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
507 allowed,
508 )
509 .map_err(|e| {
510 NucleusError::SeccompError(format!("Failed to create mprotect condition: {}", e))
511 })?;
512 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
513 NucleusError::SeccompError(format!("Failed to create mprotect rule: {}", e))
514 })?;
515 mprotect_rules.push(rule);
516 }
517 rules.insert(libc::SYS_mprotect, mprotect_rules);
518
519 for (syscall, name) in [
523 (libc::SYS_preadv2, "preadv2"),
524 (libc::SYS_pwritev2, "pwritev2"),
525 ] {
526 let condition = SeccompCondition::new(
527 5, seccompiler::SeccompCmpArgLen::Qword,
529 seccompiler::SeccompCmpOp::Eq,
530 0,
531 )
532 .map_err(|e| {
533 NucleusError::SeccompError(format!(
534 "Failed to create {} flags condition: {}",
535 name, e
536 ))
537 })?;
538 let rule = SeccompRule::new(vec![condition]).map_err(|e| {
539 NucleusError::SeccompError(format!("Failed to create {} rule: {}", name, e))
540 })?;
541 rules.insert(syscall, vec![rule]);
542 }
543
544 let clone_condition = SeccompCondition::new(
551 0, seccompiler::SeccompCmpArgLen::Qword,
553 seccompiler::SeccompCmpOp::MaskedEq(DENIED_CLONE_NAMESPACE_FLAGS),
554 0, )
556 .map_err(|e| {
557 NucleusError::SeccompError(format!("Failed to create clone condition: {}", e))
558 })?;
559 let clone_rule = SeccompRule::new(vec![clone_condition]).map_err(|e| {
560 NucleusError::SeccompError(format!("Failed to create clone rule: {}", e))
561 })?;
562 rules.insert(libc::SYS_clone, vec![clone_rule]);
563
564 let execveat_condition = SeccompCondition::new(
571 4, seccompiler::SeccompCmpArgLen::Dword,
573 seccompiler::SeccompCmpOp::MaskedEq(libc::AT_EMPTY_PATH as u64),
574 0, )
576 .map_err(|e| {
577 NucleusError::SeccompError(format!("Failed to create execveat condition: {}", e))
578 })?;
579 let execveat_rule = SeccompRule::new(vec![execveat_condition]).map_err(|e| {
580 NucleusError::SeccompError(format!("Failed to create execveat rule: {}", e))
581 })?;
582 rules.insert(libc::SYS_execveat, vec![execveat_rule]);
583
584 Ok(rules)
585 }
586
587 pub fn validate_extra_syscalls_for_production(
593 allow_network: bool,
594 extra_syscalls: &[String],
595 ) -> Result<()> {
596 let base_syscalls = Self::base_allowed_syscalls();
597 let network_syscalls = Self::network_mode_syscalls(allow_network);
598
599 for name in extra_syscalls {
600 let Some(nr) = syscall_name_to_number(name) else {
601 return Err(NucleusError::ConfigError(format!(
602 "Production mode rejects unknown --seccomp-allow syscall '{}'",
603 name
604 )));
605 };
606
607 if base_syscalls.contains(&nr)
608 || network_syscalls.contains(&nr)
609 || Self::ARG_FILTERED_SYSCALLS.contains(&name.as_str())
610 {
611 continue;
612 }
613
614 if Self::SECURITY_CRITICAL_DENIED_SYSCALLS.contains(&name.as_str()) {
615 return Err(NucleusError::ConfigError(format!(
616 "Production mode forbids --seccomp-allow for security-critical syscall '{}'",
617 name
618 )));
619 }
620
621 if Self::OPT_IN_SYSCALLS.contains(&name.as_str()) {
622 continue;
623 }
624
625 return Err(NucleusError::ConfigError(format!(
626 "Production mode rejects unsupported --seccomp-allow syscall '{}'",
627 name
628 )));
629 }
630
631 Ok(())
632 }
633
634 pub fn compile_minimal_filter() -> Result<BpfProgram> {
641 let rules = Self::minimal_filter(true, &[])?;
642 let target_arch = std::env::consts::ARCH.try_into().map_err(|e| {
643 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
644 })?;
645 super::seccomp_bpf::compile_bitmap_bpf_with_errno_syscalls(
646 rules,
647 Self::errno_denied_syscalls(),
648 SeccompAction::KillProcess,
649 SeccompAction::Allow,
650 target_arch,
651 )
652 }
653
654 #[cfg(test)]
656 pub(crate) fn minimal_filter_for_test(
657 allow_network: bool,
658 extra_syscalls: &[String],
659 ) -> BTreeMap<i64, Vec<SeccompRule>> {
660 Self::minimal_filter(allow_network, extra_syscalls).unwrap()
661 }
662
663 #[cfg(test)]
664 pub(crate) fn errno_denied_syscalls_for_test() -> &'static [(i64, u32)] {
665 Self::errno_denied_syscalls()
666 }
667
668 pub fn apply_minimal_filter(&mut self) -> Result<bool> {
676 self.apply_minimal_filter_with_mode(false, false)
677 }
678
679 pub fn apply_minimal_filter_with_mode(
684 &mut self,
685 best_effort: bool,
686 log_denied: bool,
687 ) -> Result<bool> {
688 self.apply_filter_for_network_mode(true, best_effort, log_denied, &[])
689 }
690
691 pub fn apply_filter_for_network_mode(
700 &mut self,
701 allow_network: bool,
702 best_effort: bool,
703 log_denied: bool,
704 extra_syscalls: &[String],
705 ) -> Result<bool> {
706 if self.applied {
707 debug!("Seccomp filter already applied, skipping");
708 return Ok(true);
709 }
710
711 info!(allow_network, "Applying seccomp filter");
712
713 let rules = match Self::minimal_filter(allow_network, extra_syscalls) {
714 Ok(r) => r,
715 Err(e) => {
716 if best_effort {
717 warn!(
718 "Failed to create seccomp rules: {} (continuing without seccomp)",
719 e
720 );
721 return Ok(false);
722 }
723 return Err(e);
724 }
725 };
726
727 let target_arch = match std::env::consts::ARCH.try_into() {
728 Ok(a) => a,
729 Err(e) => {
730 let msg = format!("Unsupported architecture: {:?}", e);
731 if best_effort {
732 warn!("{} (continuing without seccomp)", msg);
733 return Ok(false);
734 }
735 return Err(NucleusError::SeccompError(msg));
736 }
737 };
738
739 let bpf_prog: BpfProgram = match super::seccomp_bpf::compile_bitmap_bpf_with_errno_syscalls(
740 rules,
741 Self::errno_denied_syscalls(),
742 SeccompAction::KillProcess,
743 SeccompAction::Allow,
744 target_arch,
745 ) {
746 Ok(p) => p,
747 Err(e) => {
748 if best_effort {
749 warn!(
750 "Failed to compile BPF program: {} (continuing without seccomp)",
751 e
752 );
753 return Ok(false);
754 }
755 return Err(e);
756 }
757 };
758
759 match Self::apply_bpf_program(&bpf_prog, log_denied) {
761 Ok(_) => {
762 self.applied = true;
763 info!("Successfully applied seccomp filter");
764 Ok(true)
765 }
766 Err(e) => {
767 if best_effort {
768 warn!(
769 "Failed to apply seccomp filter: {} (continuing without seccomp)",
770 e
771 );
772 Ok(false)
773 } else {
774 Err(NucleusError::SeccompError(format!(
775 "Failed to apply seccomp filter: {}",
776 e
777 )))
778 }
779 }
780 }
781 }
782
783 pub fn apply_profile_from_file(
802 &mut self,
803 profile_path: &Path,
804 expected_sha256: Option<&str>,
805 audit_mode: bool,
806 ) -> Result<bool> {
807 if self.applied {
808 debug!("Seccomp filter already applied, skipping");
809 return Ok(true);
810 }
811
812 info!("Loading seccomp profile from {:?}", profile_path);
813
814 let content = std::fs::read(profile_path).map_err(|e| {
816 NucleusError::SeccompError(format!(
817 "Failed to read seccomp profile {:?}: {}",
818 profile_path, e
819 ))
820 })?;
821
822 if let Some(expected) = expected_sha256 {
824 let actual = sha256_hex(&content);
825 if actual != expected {
826 return Err(NucleusError::SeccompError(format!(
827 "Seccomp profile hash mismatch: expected {}, got {}",
828 expected, actual
829 )));
830 }
831 info!("Seccomp profile hash verified: {}", actual);
832 }
833
834 let profile: SeccompProfile = serde_json::from_slice(&content).map_err(|e| {
836 NucleusError::SeccompError(format!("Failed to parse seccomp profile: {}", e))
837 })?;
838
839 Self::warn_missing_arg_filters(&profile);
844
845 let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
847
848 for syscall_group in &profile.syscalls {
849 if syscall_group.action == "SCMP_ACT_ALLOW" {
850 for name in &syscall_group.names {
851 if name == "clone3" {
852 warn!(
853 "Custom seccomp profile requested clone3; ignoring it and returning \
854 ENOSYS because clone3 namespace flags cannot be argument-filtered"
855 );
856 continue;
857 }
858 if let Some(nr) = syscall_name_to_number(name) {
859 rules.insert(nr, Vec::new());
860 } else {
861 warn!("Unknown syscall in profile: {} (skipping)", name);
862 }
863 }
864 }
865 }
866
867 let builtin_rules = Self::minimal_filter(true, &[])?;
872 for syscall_name in Self::ARG_FILTERED_SYSCALLS {
873 if let Some(nr) = syscall_name_to_number(syscall_name) {
874 if let std::collections::btree_map::Entry::Occupied(mut entry) = rules.entry(nr) {
875 if let Some(builtin) = builtin_rules.get(&nr) {
876 if !builtin.is_empty() {
877 info!(
878 "Merging built-in argument filters for '{}' into custom profile",
879 syscall_name
880 );
881 entry.insert(builtin.clone());
882 }
883 }
884 }
885 }
886 }
887 let target_arch = std::env::consts::ARCH.try_into().map_err(|e| {
888 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
889 })?;
890
891 let bpf_prog: BpfProgram = super::seccomp_bpf::compile_bitmap_bpf_with_errno_syscalls(
892 rules,
893 Self::errno_denied_syscalls(),
894 SeccompAction::KillProcess,
895 SeccompAction::Allow,
896 target_arch,
897 )?;
898
899 match Self::apply_bpf_program(&bpf_prog, audit_mode) {
900 Ok(_) => {
901 self.applied = true;
902 info!(
903 "Seccomp profile applied from {:?} (log_denied={})",
904 profile_path, audit_mode
905 );
906 Ok(true)
907 }
908 Err(e) => Err(e),
909 }
910 }
911
912 pub fn apply_trace_filter(&mut self) -> Result<bool> {
917 if self.applied {
918 debug!("Seccomp filter already applied, skipping trace filter");
919 return Ok(true);
920 }
921
922 info!("Applying seccomp trace filter (allow-all + LOG)");
923
924 let filter = SeccompFilter::new(
928 BTreeMap::new(),
929 SeccompAction::Allow, SeccompAction::Allow, std::env::consts::ARCH.try_into().map_err(|e| {
932 NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
933 })?,
934 )
935 .map_err(|e| NucleusError::SeccompError(format!("Failed to create trace filter: {}", e)))?;
936
937 let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
938 NucleusError::SeccompError(format!("Failed to compile trace BPF: {}", e))
939 })?;
940
941 Self::apply_bpf_program(&bpf_prog, true)?;
943 self.applied = true;
944 info!("Seccomp trace filter applied (all syscalls allowed + logged)");
945 Ok(true)
946 }
947
948 const ARG_FILTERED_SYSCALLS: &'static [&'static str] = &[
951 "clone", "execveat", "ioctl", "mprotect", "preadv2", "prctl", "pwritev2", "socket",
952 ];
953
954 const SECURITY_CRITICAL_DENIED_SYSCALLS: &'static [&'static str] = &[
957 "clone3",
959 "unshare",
960 "setns",
961 "add_key",
963 "request_key",
964 "keyctl",
965 ];
966
967 fn errno_denied_syscalls() -> &'static [(i64, u32)] {
968 &[(libc::SYS_clone3, libc::ENOSYS as u32)]
969 }
970
971 const OPT_IN_SYSCALLS: &'static [&'static str] = &[
978 "io_uring_setup",
980 "io_uring_enter",
981 "io_uring_register",
982 "msgget",
984 "msgsnd",
985 "msgrcv",
986 "msgctl",
987 "mq_open",
989 "mq_unlink",
990 "mq_timedsend",
991 "mq_timedreceive",
992 "mq_notify",
993 "mq_getsetattr",
994 "timer_create",
996 "timer_settime",
997 "timer_gettime",
998 "timer_getoverrun",
999 "timer_delete",
1000 "inotify_init",
1002 "inotify_init1",
1003 "inotify_add_watch",
1004 "inotify_rm_watch",
1005 "fanotify_init",
1006 "fanotify_mark",
1007 "mincore",
1009 "mlockall",
1010 "munlockall",
1011 "membarrier",
1012 "process_madvise",
1013 "mbind",
1014 "set_mempolicy",
1015 "get_mempolicy",
1016 "set_mempolicy_home_node",
1017 "pkey_mprotect",
1018 "pkey_alloc",
1019 "pkey_free",
1020 "cachestat",
1021 "remap_file_pages",
1022 "sync",
1024 "syncfs",
1025 "sync_file_range",
1026 "readahead",
1027 "vmsplice",
1028 "openat2",
1029 "name_to_handle_at",
1030 "open_by_handle_at",
1031 "io_cancel",
1032 "io_pgetevents",
1033 "creat",
1034 "fchmodat2",
1035 "statmount",
1036 "listmount",
1037 "utimensat",
1038 "utimes",
1039 "utime",
1040 "futimesat",
1041 "setxattr",
1043 "lsetxattr",
1044 "fsetxattr",
1045 "removexattr",
1046 "lremovexattr",
1047 "fremovexattr",
1048 "setxattrat",
1049 "getxattrat",
1050 "listxattrat",
1051 "removexattrat",
1052 "recvmmsg",
1054 "sendmmsg",
1055 "sched_setparam",
1057 "sched_setscheduler",
1058 "sched_get_priority_max",
1059 "sched_get_priority_min",
1060 "sched_rr_get_interval",
1061 "sched_setattr",
1062 "sched_getattr",
1063 "setrlimit",
1065 "getpriority",
1066 "setpriority",
1067 "ioprio_set",
1068 "ioprio_get",
1069 "vfork",
1071 "pause",
1072 "alarm",
1073 "tkill",
1074 "sysinfo",
1075 "personality",
1076 "vhangup",
1077 "time",
1078 "pidfd_open",
1079 "pidfd_send_signal",
1080 "pidfd_getfd",
1081 "setuid",
1083 "setgid",
1084 "setreuid",
1085 "setregid",
1086 "setresuid",
1087 "getresuid",
1088 "setresgid",
1089 "getresgid",
1090 "setfsuid",
1091 "setfsgid",
1092 "setgroups",
1093 "getsid",
1094 "capget",
1096 "rt_tgsigqueueinfo",
1098 "mknod",
1100 "mknodat",
1101 "syslog",
1102 "clock_settime",
1103 "clock_adjtime",
1104 "adjtimex",
1105 "kcmp",
1106 "epoll_pwait2",
1107 "futex_waitv",
1109 "futex_wake",
1110 "futex_wait",
1111 "futex_requeue",
1112 "seccomp",
1114 ];
1115
1116 fn warn_missing_arg_filters(profile: &SeccompProfile) {
1119 for group in &profile.syscalls {
1120 if group.action != "SCMP_ACT_ALLOW" {
1121 continue;
1122 }
1123 for name in &group.names {
1124 if Self::ARG_FILTERED_SYSCALLS.contains(&name.as_str()) && group.args.is_empty() {
1125 warn!(
1126 "Custom seccomp profile allows '{}' without argument filters. \
1127 The built-in filter restricts this syscall at the argument level. \
1128 This profile weakens security compared to the default.",
1129 name
1130 );
1131 }
1132 }
1133 }
1134 }
1135
1136 pub fn is_applied(&self) -> bool {
1138 self.applied
1139 }
1140
1141 fn apply_bpf_program(bpf_prog: &BpfProgram, log_denied: bool) -> Result<()> {
1142 let mut flags: libc::c_ulong = 0;
1143 if log_denied {
1144 flags |= libc::SECCOMP_FILTER_FLAG_LOG as libc::c_ulong;
1145 }
1146
1147 match Self::apply_bpf_program_with_flags(bpf_prog, flags) {
1148 Ok(()) => Ok(()),
1149 Err(err)
1150 if log_denied
1151 && err.raw_os_error() == Some(libc::EINVAL)
1152 && libc::SECCOMP_FILTER_FLAG_LOG != 0 =>
1153 {
1154 warn!(
1155 "Kernel rejected SECCOMP_FILTER_FLAG_LOG; continuing with seccomp \
1156 enforcement without deny logging"
1157 );
1158 Self::apply_bpf_program_with_flags(bpf_prog, 0)?;
1159 Ok(())
1160 }
1161 Err(err) => Err(NucleusError::SeccompError(format!(
1162 "Failed to apply seccomp filter: {}",
1163 err
1164 ))),
1165 }
1166 }
1167
1168 fn apply_bpf_program_with_flags(
1169 bpf_prog: &BpfProgram,
1170 flags: libc::c_ulong,
1171 ) -> std::io::Result<()> {
1172 let rc = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
1175 if rc != 0 {
1176 return Err(std::io::Error::last_os_error());
1177 }
1178
1179 let prog = libc::sock_fprog {
1180 len: bpf_prog.len() as u16,
1181 filter: bpf_prog.as_ptr() as *mut libc::sock_filter,
1182 };
1183
1184 let rc = unsafe {
1187 libc::syscall(
1188 libc::SYS_seccomp,
1189 libc::SECCOMP_SET_MODE_FILTER,
1190 flags,
1191 &prog as *const libc::sock_fprog,
1192 )
1193 };
1194
1195 if rc < 0 {
1196 return Err(std::io::Error::last_os_error());
1197 }
1198
1199 Ok(())
1200 }
1201}
1202
1203use crate::security::seccomp_generate::SeccompProfile;
1205
1206fn syscall_name_to_number(name: &str) -> Option<i64> {
1210 match name {
1211 "read" => Some(libc::SYS_read),
1213 "write" => Some(libc::SYS_write),
1214 #[cfg(target_arch = "x86_64")]
1215 "open" => Some(libc::SYS_open),
1216 "openat" => Some(libc::SYS_openat),
1217 "close" => Some(libc::SYS_close),
1218 #[cfg(target_arch = "x86_64")]
1219 "stat" => Some(libc::SYS_stat),
1220 "fstat" => Some(libc::SYS_fstat),
1221 #[cfg(target_arch = "x86_64")]
1222 "lstat" => Some(libc::SYS_lstat),
1223 "lseek" => Some(libc::SYS_lseek),
1224 #[cfg(target_arch = "x86_64")]
1225 "access" => Some(libc::SYS_access),
1226 "fcntl" => Some(libc::SYS_fcntl),
1227 "readv" => Some(libc::SYS_readv),
1228 "writev" => Some(libc::SYS_writev),
1229 "preadv" => Some(libc::SYS_preadv),
1230 "pwritev" => Some(libc::SYS_pwritev),
1231 "preadv2" => Some(libc::SYS_preadv2),
1232 "pwritev2" => Some(libc::SYS_pwritev2),
1233 "pread64" => Some(libc::SYS_pread64),
1234 "pwrite64" => Some(libc::SYS_pwrite64),
1235 #[cfg(target_arch = "x86_64")]
1236 "readlink" => Some(libc::SYS_readlink),
1237 "readlinkat" => Some(libc::SYS_readlinkat),
1238 "newfstatat" => Some(libc::SYS_newfstatat),
1239 "statx" => Some(libc::SYS_statx),
1240 "faccessat" => Some(libc::SYS_faccessat),
1241 "faccessat2" => Some(libc::SYS_faccessat2),
1242 "dup" => Some(libc::SYS_dup),
1243 #[cfg(target_arch = "x86_64")]
1244 "dup2" => Some(libc::SYS_dup2),
1245 "dup3" => Some(libc::SYS_dup3),
1246 #[cfg(target_arch = "x86_64")]
1247 "pipe" => Some(libc::SYS_pipe),
1248 "pipe2" => Some(libc::SYS_pipe2),
1249 #[cfg(target_arch = "x86_64")]
1250 "unlink" => Some(libc::SYS_unlink),
1251 "unlinkat" => Some(libc::SYS_unlinkat),
1252 #[cfg(target_arch = "x86_64")]
1253 "rename" => Some(libc::SYS_rename),
1254 "renameat" => Some(libc::SYS_renameat),
1255 "renameat2" => Some(libc::SYS_renameat2),
1256 #[cfg(target_arch = "x86_64")]
1257 "link" => Some(libc::SYS_link),
1258 "linkat" => Some(libc::SYS_linkat),
1259 #[cfg(target_arch = "x86_64")]
1260 "symlink" => Some(libc::SYS_symlink),
1261 "symlinkat" => Some(libc::SYS_symlinkat),
1262 #[cfg(target_arch = "x86_64")]
1263 "chmod" => Some(libc::SYS_chmod),
1264 "fchmod" => Some(libc::SYS_fchmod),
1265 "fchmodat" => Some(libc::SYS_fchmodat),
1266 "truncate" => Some(libc::SYS_truncate),
1267 "ftruncate" => Some(libc::SYS_ftruncate),
1268 "fallocate" => Some(libc::SYS_fallocate),
1269 #[cfg(any(
1270 target_arch = "x86_64",
1271 target_arch = "aarch64",
1272 target_arch = "riscv64"
1273 ))]
1274 "fadvise64" => Some(SYS_FADVISE64),
1275 "fsync" => Some(libc::SYS_fsync),
1276 "fdatasync" => Some(libc::SYS_fdatasync),
1277 "flock" => Some(libc::SYS_flock),
1278 #[cfg(any(
1279 target_arch = "x86_64",
1280 target_arch = "aarch64",
1281 target_arch = "riscv64"
1282 ))]
1283 "sendfile" => Some(SYS_SENDFILE),
1284 "copy_file_range" => Some(libc::SYS_copy_file_range),
1285 "splice" => Some(libc::SYS_splice),
1286 "tee" => Some(libc::SYS_tee),
1287 "mmap" => Some(libc::SYS_mmap),
1289 "munmap" => Some(libc::SYS_munmap),
1290 "mprotect" => Some(libc::SYS_mprotect),
1291 "brk" => Some(libc::SYS_brk),
1292 "mremap" => Some(libc::SYS_mremap),
1293 "madvise" => Some(libc::SYS_madvise),
1294 "msync" => Some(libc::SYS_msync),
1295 "mlock" => Some(libc::SYS_mlock),
1296 "mlock2" => Some(libc::SYS_mlock2),
1297 "munlock" => Some(libc::SYS_munlock),
1298 "shmget" => Some(libc::SYS_shmget),
1300 "shmat" => Some(libc::SYS_shmat),
1301 "shmdt" => Some(libc::SYS_shmdt),
1302 "shmctl" => Some(libc::SYS_shmctl),
1303 "semget" => Some(libc::SYS_semget),
1304 "semop" => Some(libc::SYS_semop),
1305 "semctl" => Some(libc::SYS_semctl),
1306 "semtimedop" => Some(libc::SYS_semtimedop),
1307 #[cfg(target_arch = "x86_64")]
1309 "fork" => Some(libc::SYS_fork),
1310 "clone" => Some(libc::SYS_clone),
1311 "clone3" => Some(libc::SYS_clone3),
1312 "execve" => Some(libc::SYS_execve),
1313 "execveat" => Some(libc::SYS_execveat),
1314 "wait4" => Some(libc::SYS_wait4),
1315 "waitid" => Some(libc::SYS_waitid),
1316 "exit" => Some(libc::SYS_exit),
1317 "exit_group" => Some(libc::SYS_exit_group),
1318 "getpid" => Some(libc::SYS_getpid),
1319 "gettid" => Some(libc::SYS_gettid),
1320 "getuid" => Some(libc::SYS_getuid),
1321 "getgid" => Some(libc::SYS_getgid),
1322 "geteuid" => Some(libc::SYS_geteuid),
1323 "getegid" => Some(libc::SYS_getegid),
1324 "getppid" => Some(libc::SYS_getppid),
1325 #[cfg(target_arch = "x86_64")]
1326 "getpgrp" => Some(libc::SYS_getpgrp),
1327 "setsid" => Some(libc::SYS_setsid),
1328 "getgroups" => Some(libc::SYS_getgroups),
1329 "rt_sigaction" => Some(libc::SYS_rt_sigaction),
1331 "rt_sigprocmask" => Some(libc::SYS_rt_sigprocmask),
1332 "rt_sigreturn" => Some(libc::SYS_rt_sigreturn),
1333 "rt_sigsuspend" => Some(libc::SYS_rt_sigsuspend),
1334 "rt_sigtimedwait" => Some(libc::SYS_rt_sigtimedwait),
1335 "rt_sigpending" => Some(libc::SYS_rt_sigpending),
1336 "rt_sigqueueinfo" => Some(libc::SYS_rt_sigqueueinfo),
1337 "sigaltstack" => Some(libc::SYS_sigaltstack),
1338 "restart_syscall" => Some(libc::SYS_restart_syscall),
1339 "kill" => Some(libc::SYS_kill),
1340 "tgkill" => Some(libc::SYS_tgkill),
1341 "clock_gettime" => Some(libc::SYS_clock_gettime),
1343 "clock_getres" => Some(libc::SYS_clock_getres),
1344 "clock_nanosleep" => Some(libc::SYS_clock_nanosleep),
1345 "gettimeofday" => Some(libc::SYS_gettimeofday),
1346 "nanosleep" => Some(libc::SYS_nanosleep),
1347 "getcwd" => Some(libc::SYS_getcwd),
1349 "chdir" => Some(libc::SYS_chdir),
1350 "fchdir" => Some(libc::SYS_fchdir),
1351 #[cfg(target_arch = "x86_64")]
1352 "mkdir" => Some(libc::SYS_mkdir),
1353 "mkdirat" => Some(libc::SYS_mkdirat),
1354 #[cfg(target_arch = "x86_64")]
1355 "rmdir" => Some(libc::SYS_rmdir),
1356 #[cfg(target_arch = "x86_64")]
1357 "getdents" => Some(libc::SYS_getdents),
1358 "getdents64" => Some(libc::SYS_getdents64),
1359 "socket" => Some(libc::SYS_socket),
1361 "connect" => Some(libc::SYS_connect),
1362 "sendto" => Some(libc::SYS_sendto),
1363 "recvfrom" => Some(libc::SYS_recvfrom),
1364 "sendmsg" => Some(libc::SYS_sendmsg),
1365 "recvmsg" => Some(libc::SYS_recvmsg),
1366 "shutdown" => Some(libc::SYS_shutdown),
1367 "bind" => Some(libc::SYS_bind),
1368 "listen" => Some(libc::SYS_listen),
1369 "accept" => Some(libc::SYS_accept),
1370 "accept4" => Some(libc::SYS_accept4),
1371 "setsockopt" => Some(libc::SYS_setsockopt),
1372 "getsockopt" => Some(libc::SYS_getsockopt),
1373 "getsockname" => Some(libc::SYS_getsockname),
1374 "getpeername" => Some(libc::SYS_getpeername),
1375 "socketpair" => Some(libc::SYS_socketpair),
1376 #[cfg(target_arch = "x86_64")]
1378 "poll" => Some(libc::SYS_poll),
1379 "ppoll" => Some(libc::SYS_ppoll),
1380 #[cfg(target_arch = "x86_64")]
1381 "select" => Some(libc::SYS_select),
1382 "pselect6" => Some(libc::SYS_pselect6),
1383 #[cfg(target_arch = "x86_64")]
1384 "epoll_create" => Some(libc::SYS_epoll_create),
1385 "epoll_create1" => Some(libc::SYS_epoll_create1),
1386 "epoll_ctl" => Some(libc::SYS_epoll_ctl),
1387 #[cfg(target_arch = "x86_64")]
1388 "epoll_wait" => Some(libc::SYS_epoll_wait),
1389 "epoll_pwait" => Some(libc::SYS_epoll_pwait),
1390 #[cfg(target_arch = "x86_64")]
1391 "eventfd" => Some(libc::SYS_eventfd),
1392 "eventfd2" => Some(libc::SYS_eventfd2),
1393 #[cfg(target_arch = "x86_64")]
1394 "signalfd" => Some(libc::SYS_signalfd),
1395 "signalfd4" => Some(libc::SYS_signalfd4),
1396 "timerfd_create" => Some(libc::SYS_timerfd_create),
1397 "timerfd_settime" => Some(libc::SYS_timerfd_settime),
1398 "timerfd_gettime" => Some(libc::SYS_timerfd_gettime),
1399 "uname" => Some(libc::SYS_uname),
1401 "getrandom" => Some(libc::SYS_getrandom),
1402 "futex" => Some(libc::SYS_futex),
1403 "set_tid_address" => Some(libc::SYS_set_tid_address),
1404 "set_robust_list" => Some(libc::SYS_set_robust_list),
1405 "get_robust_list" => Some(libc::SYS_get_robust_list),
1406 #[cfg(target_arch = "x86_64")]
1407 "arch_prctl" => Some(libc::SYS_arch_prctl),
1408 "sysinfo" => Some(libc::SYS_sysinfo),
1409 "umask" => Some(libc::SYS_umask),
1410 #[cfg(target_arch = "x86_64")]
1411 "getrlimit" => Some(libc::SYS_getrlimit),
1412 "prlimit64" => Some(libc::SYS_prlimit64),
1413 "getrusage" => Some(libc::SYS_getrusage),
1414 "times" => Some(libc::SYS_times),
1415 "sched_yield" => Some(libc::SYS_sched_yield),
1416 "sched_getaffinity" => Some(libc::SYS_sched_getaffinity),
1417 "getcpu" => Some(libc::SYS_getcpu),
1418 "rseq" => Some(libc::SYS_rseq),
1419 "close_range" => Some(libc::SYS_close_range),
1420 "fchown" => Some(libc::SYS_fchown),
1422 "fchownat" => Some(libc::SYS_fchownat),
1423 #[cfg(target_arch = "x86_64")]
1424 "chown" => Some(libc::SYS_chown),
1425 #[cfg(target_arch = "x86_64")]
1426 "lchown" => Some(libc::SYS_lchown),
1427 "io_uring_setup" => Some(libc::SYS_io_uring_setup),
1429 "io_uring_enter" => Some(libc::SYS_io_uring_enter),
1430 "io_uring_register" => Some(libc::SYS_io_uring_register),
1431 "io_setup" => Some(libc::SYS_io_setup),
1433 "io_destroy" => Some(libc::SYS_io_destroy),
1434 "io_submit" => Some(libc::SYS_io_submit),
1435 "io_getevents" => Some(libc::SYS_io_getevents),
1436 "setitimer" => Some(libc::SYS_setitimer),
1438 "getitimer" => Some(libc::SYS_getitimer),
1439 "setpgid" => Some(libc::SYS_setpgid),
1441 "getpgid" => Some(libc::SYS_getpgid),
1442 "memfd_create" => Some(libc::SYS_memfd_create),
1443 "ioctl" => Some(libc::SYS_ioctl),
1444 "prctl" => Some(libc::SYS_prctl),
1445 "landlock_create_ruleset" => Some(libc::SYS_landlock_create_ruleset),
1447 "landlock_add_rule" => Some(libc::SYS_landlock_add_rule),
1448 "landlock_restrict_self" => Some(libc::SYS_landlock_restrict_self),
1449 "mincore" => Some(libc::SYS_mincore),
1452 "mlockall" => Some(libc::SYS_mlockall),
1453 "munlockall" => Some(libc::SYS_munlockall),
1454 "mbind" => Some(libc::SYS_mbind),
1455 "set_mempolicy" => Some(libc::SYS_set_mempolicy),
1456 "get_mempolicy" => Some(libc::SYS_get_mempolicy),
1457 "memfd_secret" => Some(libc::SYS_memfd_secret),
1458 "membarrier" => Some(libc::SYS_membarrier),
1459 "process_madvise" => Some(libc::SYS_process_madvise),
1460 "pkey_mprotect" => Some(libc::SYS_pkey_mprotect),
1461 "pkey_alloc" => Some(libc::SYS_pkey_alloc),
1462 "pkey_free" => Some(libc::SYS_pkey_free),
1463 "mseal" => Some(libc::SYS_mseal),
1464 "map_shadow_stack" => Some(453),
1465 "remap_file_pages" => Some(libc::SYS_remap_file_pages),
1466 "set_mempolicy_home_node" => Some(libc::SYS_set_mempolicy_home_node),
1467 "cachestat" => Some(451),
1468 #[cfg(target_arch = "x86_64")]
1470 "vfork" => Some(libc::SYS_vfork),
1471 #[cfg(target_arch = "x86_64")]
1472 "pause" => Some(libc::SYS_pause),
1473 #[cfg(target_arch = "x86_64")]
1474 "alarm" => Some(libc::SYS_alarm),
1475 "tkill" => Some(libc::SYS_tkill),
1476 "ptrace" => Some(libc::SYS_ptrace),
1477 "process_vm_readv" => Some(libc::SYS_process_vm_readv),
1478 "process_vm_writev" => Some(libc::SYS_process_vm_writev),
1479 "process_mrelease" => Some(libc::SYS_process_mrelease),
1480 "kcmp" => Some(libc::SYS_kcmp),
1481 "unshare" => Some(libc::SYS_unshare),
1482 "setns" => Some(libc::SYS_setns),
1483 "pidfd_open" => Some(libc::SYS_pidfd_open),
1484 "pidfd_send_signal" => Some(libc::SYS_pidfd_send_signal),
1485 "pidfd_getfd" => Some(libc::SYS_pidfd_getfd),
1486 "setuid" => Some(libc::SYS_setuid),
1488 "setgid" => Some(libc::SYS_setgid),
1489 "setreuid" => Some(libc::SYS_setreuid),
1490 "setregid" => Some(libc::SYS_setregid),
1491 "setresuid" => Some(libc::SYS_setresuid),
1492 "getresuid" => Some(libc::SYS_getresuid),
1493 "setresgid" => Some(libc::SYS_setresgid),
1494 "getresgid" => Some(libc::SYS_getresgid),
1495 "setfsuid" => Some(libc::SYS_setfsuid),
1496 "setfsgid" => Some(libc::SYS_setfsgid),
1497 "setgroups" => Some(libc::SYS_setgroups),
1498 "getsid" => Some(libc::SYS_getsid),
1499 "capget" => Some(libc::SYS_capget),
1501 "capset" => Some(libc::SYS_capset),
1502 "rt_tgsigqueueinfo" => Some(libc::SYS_rt_tgsigqueueinfo),
1504 "msgget" => Some(libc::SYS_msgget),
1506 "msgsnd" => Some(libc::SYS_msgsnd),
1507 "msgrcv" => Some(libc::SYS_msgrcv),
1508 "msgctl" => Some(libc::SYS_msgctl),
1509 "timer_create" => Some(libc::SYS_timer_create),
1511 "timer_settime" => Some(libc::SYS_timer_settime),
1512 "timer_gettime" => Some(libc::SYS_timer_gettime),
1513 "timer_getoverrun" => Some(libc::SYS_timer_getoverrun),
1514 "timer_delete" => Some(libc::SYS_timer_delete),
1515 "clock_settime" => Some(libc::SYS_clock_settime),
1516 "clock_adjtime" => Some(libc::SYS_clock_adjtime),
1517 #[cfg(target_arch = "x86_64")]
1518 "time" => Some(libc::SYS_time),
1519 #[cfg(target_arch = "x86_64")]
1521 "creat" => Some(libc::SYS_creat),
1522 "readahead" => Some(libc::SYS_readahead),
1523 "sync" => Some(libc::SYS_sync),
1524 "syncfs" => Some(libc::SYS_syncfs),
1525 "vmsplice" => Some(libc::SYS_vmsplice),
1526 "utimensat" => Some(libc::SYS_utimensat),
1527 #[cfg(target_arch = "x86_64")]
1528 "utimes" => Some(libc::SYS_utimes),
1529 #[cfg(target_arch = "x86_64")]
1530 "utime" => Some(libc::SYS_utime),
1531 #[cfg(target_arch = "x86_64")]
1532 "futimesat" => Some(libc::SYS_futimesat),
1533 "openat2" => Some(libc::SYS_openat2),
1534 "name_to_handle_at" => Some(libc::SYS_name_to_handle_at),
1535 "open_by_handle_at" => Some(libc::SYS_open_by_handle_at),
1536 "fchmodat2" => Some(libc::SYS_fchmodat2),
1537 "statmount" => Some(457),
1538 "listmount" => Some(458),
1539 "setxattr" => Some(libc::SYS_setxattr),
1541 "lsetxattr" => Some(libc::SYS_lsetxattr),
1542 "fsetxattr" => Some(libc::SYS_fsetxattr),
1543 "removexattr" => Some(libc::SYS_removexattr),
1544 "lremovexattr" => Some(libc::SYS_lremovexattr),
1545 "fremovexattr" => Some(libc::SYS_fremovexattr),
1546 "setxattrat" => Some(463),
1547 "getxattrat" => Some(464),
1548 "listxattrat" => Some(465),
1549 "removexattrat" => Some(466),
1550 "recvmmsg" => Some(libc::SYS_recvmmsg),
1552 "sendmmsg" => Some(libc::SYS_sendmmsg),
1553 #[cfg(target_arch = "x86_64")]
1555 "inotify_init" => Some(libc::SYS_inotify_init),
1556 "inotify_init1" => Some(libc::SYS_inotify_init1),
1557 "inotify_add_watch" => Some(libc::SYS_inotify_add_watch),
1558 "inotify_rm_watch" => Some(libc::SYS_inotify_rm_watch),
1559 "fanotify_init" => Some(libc::SYS_fanotify_init),
1561 "fanotify_mark" => Some(libc::SYS_fanotify_mark),
1562 "epoll_pwait2" => Some(libc::SYS_epoll_pwait2),
1564 "sched_setparam" => Some(libc::SYS_sched_setparam),
1566 "sched_setscheduler" => Some(libc::SYS_sched_setscheduler),
1567 "sched_get_priority_max" => Some(libc::SYS_sched_get_priority_max),
1568 "sched_get_priority_min" => Some(libc::SYS_sched_get_priority_min),
1569 "sched_rr_get_interval" => Some(libc::SYS_sched_rr_get_interval),
1570 "sched_setattr" => Some(libc::SYS_sched_setattr),
1571 "sched_getattr" => Some(libc::SYS_sched_getattr),
1572 "sched_setaffinity" => Some(libc::SYS_sched_setaffinity),
1573 #[cfg(target_arch = "x86_64")]
1575 "setrlimit" => Some(libc::SYS_setrlimit),
1576 "getpriority" => Some(libc::SYS_getpriority),
1577 "setpriority" => Some(libc::SYS_setpriority),
1578 "ioprio_set" => Some(libc::SYS_ioprio_set),
1579 "ioprio_get" => Some(libc::SYS_ioprio_get),
1580 "futex_waitv" => Some(libc::SYS_futex_waitv),
1582 "futex_wake" => Some(454),
1583 "futex_wait" => Some(455),
1584 "futex_requeue" => Some(456),
1585 "init_module" => Some(libc::SYS_init_module),
1587 "finit_module" => Some(libc::SYS_finit_module),
1588 "delete_module" => Some(libc::SYS_delete_module),
1589 "bpf" => Some(libc::SYS_bpf),
1591 "perf_event_open" => Some(libc::SYS_perf_event_open),
1592 "seccomp" => Some(libc::SYS_seccomp),
1594 "userfaultfd" => Some(libc::SYS_userfaultfd),
1596 "mount" => Some(libc::SYS_mount),
1598 "umount2" => Some(libc::SYS_umount2),
1599 "pivot_root" => Some(libc::SYS_pivot_root),
1600 "mount_setattr" => Some(libc::SYS_mount_setattr),
1601 "open_tree" => Some(libc::SYS_open_tree),
1602 "open_tree_attr" => Some(467),
1603 "move_mount" => Some(libc::SYS_move_mount),
1604 "fsopen" => Some(libc::SYS_fsopen),
1605 "fsconfig" => Some(libc::SYS_fsconfig),
1606 "fsmount" => Some(libc::SYS_fsmount),
1607 "fspick" => Some(libc::SYS_fspick),
1608 "syslog" => Some(libc::SYS_syslog),
1610 "reboot" => Some(libc::SYS_reboot),
1611 "swapon" => Some(libc::SYS_swapon),
1612 "swapoff" => Some(libc::SYS_swapoff),
1613 "chroot" => Some(libc::SYS_chroot),
1614 "acct" => Some(libc::SYS_acct),
1615 "settimeofday" => Some(libc::SYS_settimeofday),
1616 "sethostname" => Some(libc::SYS_sethostname),
1617 "setdomainname" => Some(libc::SYS_setdomainname),
1618 "adjtimex" => Some(libc::SYS_adjtimex),
1619 #[cfg(target_arch = "x86_64")]
1620 "modify_ldt" => Some(libc::SYS_modify_ldt),
1621 #[cfg(target_arch = "x86_64")]
1622 "iopl" => Some(libc::SYS_iopl),
1623 #[cfg(target_arch = "x86_64")]
1624 "ioperm" => Some(libc::SYS_ioperm),
1625 "quotactl" => Some(libc::SYS_quotactl),
1626 "quotactl_fd" => Some(libc::SYS_quotactl_fd),
1627 "personality" => Some(libc::SYS_personality),
1628 "vhangup" => Some(libc::SYS_vhangup),
1629 #[cfg(target_arch = "x86_64")]
1630 "ustat" => Some(libc::SYS_ustat),
1631 #[cfg(target_arch = "x86_64")]
1632 "sysfs" => Some(libc::SYS_sysfs),
1633 "mknod" => Some(libc::SYS_mknod),
1634 "mknodat" => Some(libc::SYS_mknodat),
1635 "migrate_pages" => Some(libc::SYS_migrate_pages),
1636 "move_pages" => Some(libc::SYS_move_pages),
1637 #[cfg(target_arch = "x86_64")]
1638 "kexec_load" => Some(libc::SYS_kexec_load),
1639 "kexec_file_load" => Some(libc::SYS_kexec_file_load),
1640 "mq_open" => Some(libc::SYS_mq_open),
1642 "mq_unlink" => Some(libc::SYS_mq_unlink),
1643 "mq_timedsend" => Some(libc::SYS_mq_timedsend),
1644 "mq_timedreceive" => Some(libc::SYS_mq_timedreceive),
1645 "mq_notify" => Some(libc::SYS_mq_notify),
1646 "mq_getsetattr" => Some(libc::SYS_mq_getsetattr),
1647 "add_key" => Some(libc::SYS_add_key),
1649 "request_key" => Some(libc::SYS_request_key),
1650 "keyctl" => Some(libc::SYS_keyctl),
1651 "io_pgetevents" => Some(333),
1653 "lsm_get_self_attr" => Some(459),
1655 "lsm_set_self_attr" => Some(460),
1656 "lsm_list_modules" => Some(461),
1657 #[cfg(target_arch = "x86_64")]
1658 "lookup_dcookie" => Some(libc::SYS_lookup_dcookie),
1659 "uretprobe" => Some(335),
1660 _ => None,
1661 }
1662}
1663
1664impl Default for SeccompManager {
1665 fn default() -> Self {
1666 Self::new()
1667 }
1668}
1669
1670#[cfg(test)]
1671mod tests {
1672 use super::*;
1673
1674 #[test]
1675 fn test_seccomp_manager_initial_state() {
1676 let mgr = SeccompManager::new();
1677 assert!(!mgr.is_applied());
1678 }
1679
1680 #[test]
1681 fn test_apply_idempotent() {
1682 let mgr = SeccompManager::new();
1683 assert!(!mgr.is_applied());
1687 }
1688
1689 #[test]
1690 fn test_clone_denied_flags_include_newcgroup() {
1691 assert_ne!(
1692 DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWCGROUP as u64,
1693 0
1694 );
1695 }
1696
1697 #[test]
1698 fn test_clone_denied_flags_include_newtime() {
1699 assert_ne!(
1700 DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWTIME as u64,
1701 0,
1702 "CLONE_NEWTIME must be in denied clone namespace flags"
1703 );
1704 }
1705
1706 #[test]
1707 fn test_network_none_socket_domains_are_unix_only() {
1708 let domains = SeccompManager::allowed_socket_domains(false);
1709 assert_eq!(domains, vec![libc::AF_UNIX]);
1710 }
1711
1712 #[test]
1713 fn test_network_enabled_socket_domains_exclude_netlink() {
1714 let domains = SeccompManager::allowed_socket_domains(true);
1715 assert!(domains.contains(&libc::AF_UNIX));
1716 assert!(domains.contains(&libc::AF_INET));
1717 assert!(domains.contains(&libc::AF_INET6));
1718 assert!(!domains.contains(&libc::AF_NETLINK));
1719 }
1720
1721 #[test]
1722 fn test_network_mode_syscalls_only_enabled_when_network_allowed() {
1723 let none = SeccompManager::network_mode_syscalls(false);
1724 assert!(none.is_empty());
1725
1726 let enabled = SeccompManager::network_mode_syscalls(true);
1727 assert!(enabled.contains(&libc::SYS_connect));
1728 assert!(enabled.contains(&libc::SYS_bind));
1729 assert!(enabled.contains(&libc::SYS_listen));
1730 assert!(enabled.contains(&libc::SYS_accept));
1731 assert!(enabled.contains(&libc::SYS_setsockopt));
1732 }
1733
1734 #[test]
1735 fn test_landlock_bootstrap_syscalls_present_in_base_allowlist() {
1736 let base = SeccompManager::base_allowed_syscalls();
1737 assert!(base.contains(&libc::SYS_landlock_create_ruleset));
1738 assert!(base.contains(&libc::SYS_landlock_add_rule));
1739 assert!(base.contains(&libc::SYS_landlock_restrict_self));
1740 }
1741
1742 #[cfg(any(
1743 target_arch = "x86_64",
1744 target_arch = "aarch64",
1745 target_arch = "riscv64"
1746 ))]
1747 #[test]
1748 fn test_generic_file_syscalls_present_in_base_allowlist() {
1749 let base = SeccompManager::base_allowed_syscalls();
1750 assert!(
1751 base.contains(&SYS_FADVISE64),
1752 "fadvise64 must be allowed on architectures with a generic syscall table"
1753 );
1754 assert!(
1755 base.contains(&SYS_SENDFILE),
1756 "sendfile must be allowed on architectures with a generic syscall table"
1757 );
1758 }
1759
1760 #[cfg(any(
1761 target_arch = "x86_64",
1762 target_arch = "aarch64",
1763 target_arch = "riscv64"
1764 ))]
1765 #[test]
1766 fn test_generic_file_syscall_names_resolve_for_profiles() {
1767 assert_eq!(syscall_name_to_number("fadvise64"), Some(SYS_FADVISE64));
1768 assert_eq!(syscall_name_to_number("sendfile"), Some(SYS_SENDFILE));
1769 }
1770
1771 #[test]
1772 fn test_x32_legacy_range_not_allowlisted() {
1773 let base = SeccompManager::base_allowed_syscalls();
1774 let net = SeccompManager::network_mode_syscalls(true);
1775 for nr in 512_i64..=547_i64 {
1776 assert!(
1777 !base.contains(&nr) && !net.contains(&nr),
1778 "x32 syscall number {} unexpectedly allowlisted",
1779 nr
1780 );
1781 }
1782 }
1783
1784 #[test]
1785 fn test_i386_compat_socketcall_range_not_allowlisted() {
1786 let base = SeccompManager::base_allowed_syscalls();
1787 let net = SeccompManager::network_mode_syscalls(true);
1788 for nr in 359_i64..=373_i64 {
1791 assert!(
1792 !base.contains(&nr) && !net.contains(&nr),
1793 "i386 compat syscall number {} unexpectedly allowlisted",
1794 nr
1795 );
1796 }
1797 }
1798
1799 #[test]
1800 fn test_minimal_filter_allowlist_counts_are_stable() {
1801 let base = SeccompManager::base_allowed_syscalls();
1802 let net = SeccompManager::network_mode_syscalls(true);
1803
1804 assert_eq!(base.len(), 171);
1812 assert_eq!(net.len(), 11);
1813 assert_eq!(base.len() + 9, 180);
1814 assert_eq!(base.len() + net.len() + 9, 191);
1815 }
1816
1817 #[test]
1818 fn test_arg_filtered_syscalls_list_includes_critical_syscalls() {
1819 for name in &[
1822 "clone", "execveat", "ioctl", "preadv2", "prctl", "pwritev2", "socket",
1823 ] {
1824 assert!(
1825 SeccompManager::ARG_FILTERED_SYSCALLS.contains(name),
1826 "'{}' must be in ARG_FILTERED_SYSCALLS",
1827 name
1828 );
1829 }
1830 }
1831
1832 #[test]
1833 fn test_clone3_not_allowlisted_in_minimal_filter() {
1834 let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
1838 assert!(
1839 !rules.contains_key(&libc::SYS_clone3),
1840 "clone3 must not be in the seccomp allowlist"
1841 );
1842 assert!(
1843 SeccompManager::errno_denied_syscalls()
1844 .iter()
1845 .any(|(nr, errno)| *nr == libc::SYS_clone3 && *errno == libc::ENOSYS as u32),
1846 "clone3 must be denied with ENOSYS to trigger libc fallback"
1847 );
1848 }
1849
1850 #[test]
1851 fn test_clone_is_allowed_with_arg_filter() {
1852 let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
1854 assert!(
1855 rules.contains_key(&libc::SYS_clone),
1856 "clone must be in the seccomp allowlist with arg filters"
1857 );
1858 }
1859
1860 #[test]
1861 fn test_high_risk_syscalls_removed_from_base_allowlist() {
1862 let base = SeccompManager::base_allowed_syscalls();
1863 let removed = [
1866 libc::SYS_sync,
1867 libc::SYS_syncfs,
1868 libc::SYS_mincore,
1869 libc::SYS_vfork,
1870 libc::SYS_tkill,
1871 libc::SYS_io_uring_setup,
1873 libc::SYS_io_uring_enter,
1874 libc::SYS_io_uring_register,
1875 ];
1876
1877 for syscall in removed {
1878 assert!(
1879 !base.contains(&syscall),
1880 "syscall {} unexpectedly present in base allowlist",
1881 syscall
1882 );
1883 }
1884 }
1885
1886 #[test]
1887 fn test_custom_profile_preserves_clone_arg_filters() {
1888 let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
1893
1894 for name in SeccompManager::ARG_FILTERED_SYSCALLS {
1897 if let Some(nr) = syscall_name_to_number(name) {
1898 let entry = rules.get(&nr);
1899 assert!(
1900 entry.is_some() && !entry.unwrap().is_empty(),
1901 "built-in filter must have argument-level rules for '{}' \
1902 so apply_profile_from_file can merge them into custom profiles",
1903 name
1904 );
1905 }
1906 }
1907 }
1908
1909 #[test]
1910 fn test_memfd_create_not_in_default_allowlist() {
1911 let base = SeccompManager::base_allowed_syscalls();
1913 assert!(
1914 !base.contains(&libc::SYS_memfd_create),
1915 "memfd_create must not be in the default seccomp allowlist (fileless exec risk)"
1916 );
1917 let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
1919 assert!(
1920 !rules.contains_key(&libc::SYS_memfd_create),
1921 "memfd_create must not be in the compiled seccomp filter rules"
1922 );
1923 }
1924
1925 #[test]
1926 fn test_mprotect_has_arg_filtering() {
1927 let base = SeccompManager::base_allowed_syscalls();
1932 assert!(
1933 !base.contains(&libc::SYS_mprotect),
1934 "SYS_mprotect must not be unconditionally allowed - needs arg filtering"
1935 );
1936
1937 let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
1940 let mprotect_rules = rules.get(&libc::SYS_mprotect);
1941 assert!(
1942 mprotect_rules.is_some(),
1943 "mprotect must be present in the seccomp filter rules"
1944 );
1945 assert!(
1946 !mprotect_rules.unwrap().is_empty(),
1947 "mprotect must have argument-level conditions to prevent W^X violations"
1948 );
1949 }
1950
1951 #[test]
1952 fn test_preadv2_pwritev2_have_flags_arg_filtering() {
1953 let base = SeccompManager::base_allowed_syscalls();
1957 let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
1958
1959 for (name, syscall) in [
1960 ("preadv2", libc::SYS_preadv2),
1961 ("pwritev2", libc::SYS_pwritev2),
1962 ] {
1963 assert!(
1964 !base.contains(&syscall),
1965 "{} must not be unconditionally allowed",
1966 name
1967 );
1968 assert!(
1969 rules.get(&syscall).is_some_and(|chain| !chain.is_empty()),
1970 "{} must have argument-level conditions",
1971 name
1972 );
1973 assert!(
1974 SeccompManager::ARG_FILTERED_SYSCALLS.contains(&name),
1975 "{} must be listed as argument-filtered for custom profiles",
1976 name
1977 );
1978 }
1979 }
1980
1981 #[test]
1982 fn test_unsafe_blocks_have_safety_comments() {
1983 let source = include_str!("seccomp.rs");
1985 let mut pos = 0;
1986 while let Some(idx) = source[pos..].find("unsafe {") {
1987 let abs_idx = pos + idx;
1988 let start = abs_idx.saturating_sub(200);
1990 let context = &source[start..abs_idx];
1991 assert!(
1992 context.contains("SAFETY:"),
1993 "unsafe block at byte {} must have a // SAFETY: comment. Context: ...{}...",
1994 abs_idx,
1995 &source[abs_idx.saturating_sub(80)..abs_idx + 10]
1996 );
1997 pos = abs_idx + 1;
1998 }
1999 }
2000
2001 fn mprotect_would_allow(prot: u64) -> bool {
2011 let mask = (libc::PROT_WRITE | libc::PROT_EXEC) as u64;
2012 let allowed_values: &[u64] = &[0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64];
2013 let masked = prot & mask;
2014 allowed_values.contains(&masked)
2015 }
2016
2017 #[test]
2018 fn test_mprotect_allows_prot_none() {
2019 assert!(mprotect_would_allow(0), "PROT_NONE must be allowed");
2020 }
2021
2022 #[test]
2023 fn test_mprotect_allows_prot_read_only() {
2024 assert!(
2025 mprotect_would_allow(libc::PROT_READ as u64),
2026 "PROT_READ must be allowed (W|X bits are 0)"
2027 );
2028 }
2029
2030 #[test]
2031 fn test_mprotect_allows_prot_read_write() {
2032 assert!(
2033 mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE) as u64),
2034 "PROT_READ|PROT_WRITE must be allowed"
2035 );
2036 }
2037
2038 #[test]
2039 fn test_mprotect_allows_prot_read_exec() {
2040 assert!(
2041 mprotect_would_allow((libc::PROT_READ | libc::PROT_EXEC) as u64),
2042 "PROT_READ|PROT_EXEC must be allowed"
2043 );
2044 }
2045
2046 #[test]
2047 fn test_mprotect_rejects_prot_write_exec() {
2048 assert!(
2049 !mprotect_would_allow((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
2050 "PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
2051 );
2052 }
2053
2054 #[test]
2055 fn test_mprotect_rejects_prot_read_write_exec() {
2056 assert!(
2057 !mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC) as u64),
2058 "PROT_READ|PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
2059 );
2060 }
2061
2062 #[test]
2063 fn test_mprotect_allows_prot_write_alone() {
2064 assert!(
2065 mprotect_would_allow(libc::PROT_WRITE as u64),
2066 "PROT_WRITE alone must be allowed"
2067 );
2068 }
2069
2070 #[test]
2071 fn test_mprotect_allows_prot_exec_alone() {
2072 assert!(
2073 mprotect_would_allow(libc::PROT_EXEC as u64),
2074 "PROT_EXEC alone must be allowed"
2075 );
2076 }
2077
2078 #[test]
2081 fn test_extra_syscalls_are_merged_into_filter() {
2082 let extra = vec!["io_uring_setup".to_string(), "sysinfo".to_string()];
2083 let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
2084 assert!(
2085 rules.contains_key(&libc::SYS_io_uring_setup),
2086 "io_uring_setup must be in filter when requested via extra_syscalls"
2087 );
2088 assert!(
2089 rules.contains_key(&libc::SYS_sysinfo),
2090 "sysinfo must be in filter when requested via extra_syscalls"
2091 );
2092 }
2093
2094 #[test]
2095 fn test_extra_syscalls_do_not_override_arg_filtered() {
2096 let extra = vec![
2100 "clone".to_string(),
2101 "preadv2".to_string(),
2102 "pwritev2".to_string(),
2103 ];
2104 let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
2105 for (name, syscall) in [
2106 ("clone", libc::SYS_clone),
2107 ("preadv2", libc::SYS_preadv2),
2108 ("pwritev2", libc::SYS_pwritev2),
2109 ] {
2110 assert!(
2111 rules.get(&syscall).is_some_and(|chain| !chain.is_empty()),
2112 "{} must retain argument-level filtering even when in extra_syscalls",
2113 name
2114 );
2115 }
2116 }
2117
2118 #[test]
2119 fn test_extra_syscalls_unknown_name_is_warned_and_skipped() {
2120 let extra = vec!["not_a_real_syscall".to_string()];
2122 let result = SeccompManager::minimal_filter(true, &extra);
2123 assert!(
2124 result.is_ok(),
2125 "Unknown syscall name should warn and skip, not error"
2126 );
2127 }
2128
2129 #[test]
2130 fn test_extra_syscalls_empty_is_noop() {
2131 let rules_without = SeccompManager::minimal_filter(true, &[]).unwrap();
2132 let rules_with = SeccompManager::minimal_filter(true, &[]).unwrap();
2133 assert_eq!(rules_without.len(), rules_with.len());
2134 }
2135
2136 #[test]
2137 fn test_extra_syscalls_duplicate_of_default_is_harmless() {
2138 let extra = vec!["read".to_string()];
2140 let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
2141 assert!(rules.contains_key(&libc::SYS_read));
2142 }
2143
2144 #[test]
2145 fn test_extra_syscalls_blocked_known_syscall_not_added() {
2146 let extra = vec!["kexec_load".to_string()];
2149 let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
2150 assert!(
2151 !rules.contains_key(&libc::SYS_kexec_load),
2152 "kexec_load must be blocked even when requested via --seccomp-allow"
2153 );
2154 }
2155
2156 #[test]
2157 fn test_extra_syscalls_unshare_remains_blocked() {
2158 let extra = vec!["unshare".to_string()];
2159 let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
2160 assert!(
2161 !rules.contains_key(&libc::SYS_unshare),
2162 "unshare must stay blocked even when requested via --seccomp-allow"
2163 );
2164 }
2165
2166 #[test]
2167 fn test_extra_syscalls_keyring_remain_blocked() {
2168 let extra = vec![
2169 "add_key".to_string(),
2170 "request_key".to_string(),
2171 "keyctl".to_string(),
2172 ];
2173 let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
2174
2175 for (name, syscall) in [
2176 ("add_key", libc::SYS_add_key),
2177 ("request_key", libc::SYS_request_key),
2178 ("keyctl", libc::SYS_keyctl),
2179 ] {
2180 assert!(
2181 !rules.contains_key(&syscall),
2182 "{} must stay blocked even when requested via --seccomp-allow",
2183 name
2184 );
2185 assert!(
2186 !SeccompManager::OPT_IN_SYSCALLS.contains(&name),
2187 "{} must not be in the seccomp opt-in allowlist",
2188 name
2189 );
2190 }
2191 }
2192
2193 #[test]
2194 fn test_security_critical_syscalls_remain_absent_from_filter() {
2195 let extra = SeccompManager::SECURITY_CRITICAL_DENIED_SYSCALLS
2196 .iter()
2197 .map(|name| (*name).to_string())
2198 .collect::<Vec<_>>();
2199 let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
2200
2201 for name in SeccompManager::SECURITY_CRITICAL_DENIED_SYSCALLS {
2202 let syscall = syscall_name_to_number(name).unwrap();
2203 assert!(
2204 !rules.contains_key(&syscall),
2205 "{} must not appear in the built-in filter even when requested via --seccomp-allow",
2206 name
2207 );
2208 assert!(
2209 !SeccompManager::OPT_IN_SYSCALLS.contains(name),
2210 "{} must not be in the seccomp opt-in allowlist",
2211 name
2212 );
2213 }
2214 }
2215
2216 #[test]
2217 fn test_extra_syscalls_clone3_remains_blocked() {
2218 let extra = vec!["clone3".to_string()];
2219 let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
2220 assert!(
2221 !rules.contains_key(&libc::SYS_clone3),
2222 "clone3 must stay out of the allowlist even when requested via --seccomp-allow"
2223 );
2224 assert!(
2225 SeccompManager::errno_denied_syscalls()
2226 .iter()
2227 .any(|(nr, _)| *nr == libc::SYS_clone3),
2228 "clone3 must remain covered by the exact ENOSYS deny"
2229 );
2230 }
2231
2232 #[test]
2233 fn test_extra_syscalls_opt_in_syscall_is_added() {
2234 let extra = vec!["io_uring_setup".to_string()];
2236 let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
2237 assert!(
2238 rules.contains_key(&libc::SYS_io_uring_setup),
2239 "io_uring_setup is in OPT_IN_SYSCALLS and must be added"
2240 );
2241 }
2242
2243 #[test]
2244 fn test_production_validation_rejects_security_critical_extra_syscalls() {
2245 for name in [
2246 "clone3",
2247 "unshare",
2248 "setns",
2249 "add_key",
2250 "request_key",
2251 "keyctl",
2252 ] {
2253 let extra = vec![name.to_string()];
2254 let err =
2255 SeccompManager::validate_extra_syscalls_for_production(true, &extra).unwrap_err();
2256 assert!(err.to_string().contains("security-critical"));
2257 assert!(err.to_string().contains(name));
2258 }
2259 }
2260
2261 #[test]
2262 fn test_production_validation_rejects_unsupported_extra_syscalls() {
2263 let extra = vec!["kexec_load".to_string()];
2264 let err = SeccompManager::validate_extra_syscalls_for_production(true, &extra).unwrap_err();
2265 assert!(err.to_string().contains("unsupported"));
2266 assert!(err.to_string().contains("kexec_load"));
2267 }
2268
2269 #[test]
2270 fn test_production_validation_allows_supported_extra_syscalls() {
2271 let extra = vec![
2272 "read".to_string(),
2273 "clone".to_string(),
2274 "connect".to_string(),
2275 "io_uring_setup".to_string(),
2276 ];
2277 SeccompManager::validate_extra_syscalls_for_production(true, &extra).unwrap();
2278 }
2279}