use crate::error::{NucleusError, Result};
use crate::security::policy::sha256_hex;
use seccompiler::{BpfProgram, SeccompAction, SeccompCondition, SeccompFilter, SeccompRule};
use std::collections::BTreeMap;
use std::path::Path;
use tracing::{debug, info, warn};
pub struct SeccompManager {
applied: bool,
}
const DENIED_CLONE_NAMESPACE_FLAGS: u64 = (libc::CLONE_NEWUSER
| libc::CLONE_NEWNS
| libc::CLONE_NEWNET
| libc::CLONE_NEWIPC
| libc::CLONE_NEWUTS
| libc::CLONE_NEWPID
| libc::CLONE_NEWCGROUP
| libc::CLONE_NEWTIME) as u64;
impl SeccompManager {
pub fn new() -> Self {
Self { applied: false }
}
fn base_allowed_syscalls() -> Vec<i64> {
let mut syscalls = vec![
libc::SYS_read,
libc::SYS_write,
libc::SYS_openat,
libc::SYS_close,
libc::SYS_fstat,
libc::SYS_lseek,
libc::SYS_fcntl,
libc::SYS_readv,
libc::SYS_writev,
libc::SYS_preadv,
libc::SYS_pwritev,
libc::SYS_preadv2,
libc::SYS_pwritev2,
libc::SYS_pread64,
libc::SYS_pwrite64,
libc::SYS_readlinkat,
libc::SYS_newfstatat,
libc::SYS_statx,
libc::SYS_faccessat,
libc::SYS_faccessat2,
libc::SYS_dup,
libc::SYS_dup3,
libc::SYS_pipe2,
libc::SYS_unlinkat,
libc::SYS_renameat,
libc::SYS_renameat2,
libc::SYS_linkat,
libc::SYS_symlinkat,
libc::SYS_fchmod,
libc::SYS_fchmodat,
libc::SYS_truncate,
libc::SYS_ftruncate,
libc::SYS_fallocate,
#[cfg(target_arch = "x86_64")]
libc::SYS_fadvise64,
libc::SYS_fsync,
libc::SYS_fdatasync,
libc::SYS_sync_file_range,
libc::SYS_flock,
libc::SYS_fstatfs,
libc::SYS_statfs,
#[cfg(target_arch = "x86_64")]
libc::SYS_sendfile,
libc::SYS_copy_file_range,
libc::SYS_splice,
libc::SYS_tee,
libc::SYS_mmap,
libc::SYS_munmap,
libc::SYS_brk,
libc::SYS_mremap,
libc::SYS_madvise,
libc::SYS_msync,
libc::SYS_mlock,
libc::SYS_munlock,
libc::SYS_mlock2,
libc::SYS_shmget,
libc::SYS_shmat,
libc::SYS_shmdt,
libc::SYS_shmctl,
libc::SYS_semget,
libc::SYS_semop,
libc::SYS_semctl,
libc::SYS_semtimedop,
libc::SYS_execve,
libc::SYS_wait4,
libc::SYS_waitid,
libc::SYS_exit,
libc::SYS_exit_group,
libc::SYS_getpid,
libc::SYS_gettid,
libc::SYS_getuid,
libc::SYS_getgid,
libc::SYS_geteuid,
libc::SYS_getegid,
libc::SYS_getppid,
libc::SYS_setsid,
libc::SYS_getgroups,
libc::SYS_rt_sigaction,
libc::SYS_rt_sigprocmask,
libc::SYS_rt_sigreturn,
libc::SYS_rt_sigsuspend,
libc::SYS_rt_sigtimedwait,
libc::SYS_rt_sigpending,
libc::SYS_rt_sigqueueinfo,
libc::SYS_sigaltstack,
libc::SYS_restart_syscall,
libc::SYS_kill,
libc::SYS_tgkill,
libc::SYS_clock_gettime,
libc::SYS_clock_getres,
libc::SYS_clock_nanosleep,
libc::SYS_gettimeofday,
libc::SYS_nanosleep,
libc::SYS_setitimer,
libc::SYS_getitimer,
libc::SYS_getcwd,
libc::SYS_chdir,
libc::SYS_fchdir,
libc::SYS_mkdirat,
libc::SYS_getdents64,
libc::SYS_uname,
libc::SYS_getrandom,
libc::SYS_futex,
libc::SYS_set_tid_address,
libc::SYS_set_robust_list,
libc::SYS_get_robust_list,
libc::SYS_umask,
libc::SYS_getrusage,
libc::SYS_times,
libc::SYS_sched_yield,
libc::SYS_sched_getaffinity,
libc::SYS_sched_setaffinity,
libc::SYS_sched_getparam,
libc::SYS_sched_getscheduler,
libc::SYS_getcpu,
libc::SYS_getxattr,
libc::SYS_lgetxattr,
libc::SYS_fgetxattr,
libc::SYS_listxattr,
libc::SYS_llistxattr,
libc::SYS_flistxattr,
libc::SYS_rseq,
libc::SYS_close_range,
libc::SYS_fchown,
libc::SYS_fchownat,
libc::SYS_io_setup,
libc::SYS_io_destroy,
libc::SYS_io_submit,
libc::SYS_io_getevents,
libc::SYS_setpgid,
libc::SYS_getpgid,
libc::SYS_landlock_create_ruleset,
libc::SYS_landlock_add_rule,
libc::SYS_landlock_restrict_self,
libc::SYS_getsockname,
libc::SYS_getpeername,
libc::SYS_socketpair,
libc::SYS_getsockopt,
libc::SYS_ppoll,
libc::SYS_pselect6,
libc::SYS_epoll_create1,
libc::SYS_epoll_ctl,
libc::SYS_epoll_pwait,
libc::SYS_eventfd2,
libc::SYS_signalfd4,
libc::SYS_timerfd_create,
libc::SYS_timerfd_settime,
libc::SYS_timerfd_gettime,
];
#[cfg(target_arch = "x86_64")]
syscalls.extend_from_slice(&[
libc::SYS_open,
libc::SYS_stat,
libc::SYS_lstat,
libc::SYS_access,
libc::SYS_readlink,
libc::SYS_dup2,
libc::SYS_pipe,
libc::SYS_unlink,
libc::SYS_rename,
libc::SYS_link,
libc::SYS_symlink,
libc::SYS_chmod,
libc::SYS_mkdir,
libc::SYS_rmdir,
libc::SYS_getdents,
libc::SYS_getpgrp,
libc::SYS_chown,
libc::SYS_fchown,
libc::SYS_lchown,
libc::SYS_arch_prctl,
libc::SYS_getrlimit,
libc::SYS_poll,
libc::SYS_select,
libc::SYS_epoll_create,
libc::SYS_epoll_wait,
libc::SYS_eventfd,
libc::SYS_signalfd,
]);
syscalls
}
fn allowed_socket_domains(allow_network: bool) -> Vec<i32> {
if allow_network {
vec![libc::AF_UNIX, libc::AF_INET, libc::AF_INET6]
} else {
vec![libc::AF_UNIX]
}
}
fn network_mode_syscalls(allow_network: bool) -> Vec<i64> {
if allow_network {
vec![
libc::SYS_connect,
libc::SYS_sendto,
libc::SYS_recvfrom,
libc::SYS_sendmsg,
libc::SYS_recvmsg,
libc::SYS_shutdown,
libc::SYS_bind,
libc::SYS_listen,
libc::SYS_accept,
libc::SYS_accept4,
libc::SYS_setsockopt,
]
} else {
Vec::new()
}
}
fn minimal_filter(
allow_network: bool,
extra_syscalls: &[String],
) -> Result<BTreeMap<i64, Vec<SeccompRule>>> {
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
let allowed_syscalls = Self::base_allowed_syscalls();
for syscall in allowed_syscalls {
rules.insert(syscall, Vec::new());
}
for syscall in Self::network_mode_syscalls(allow_network) {
rules.insert(syscall, Vec::new());
}
for name in extra_syscalls {
if let Some(nr) = syscall_name_to_number(name) {
if rules.contains_key(&nr) {
} else if Self::OPT_IN_SYSCALLS.contains(&name.as_str()) {
rules.insert(nr, Vec::new());
} else {
warn!(
"--seccomp-allow: syscall '{}' is not in the opt-in allowlist – blocked",
name
);
}
} else {
warn!("--seccomp-allow: unknown syscall '{}' – blocked", name);
}
}
let mut socket_rules = Vec::new();
for domain in Self::allowed_socket_domains(allow_network) {
let condition = SeccompCondition::new(
0, seccompiler::SeccompCmpArgLen::Dword,
seccompiler::SeccompCmpOp::Eq,
domain as u64,
)
.map_err(|e| {
NucleusError::SeccompError(format!(
"Failed to create socket domain condition: {}",
e
))
})?;
let rule = SeccompRule::new(vec![condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create socket rule: {}", e))
})?;
socket_rules.push(rule);
}
rules.insert(libc::SYS_socket, socket_rules);
let ioctl_allowed: &[u64] = &[
0x5401, 0x5402, 0x5403, 0x5404, 0x540B, 0x540F, 0x5410, 0x5413, 0x5429, 0x541B, 0x5421, 0x5451, 0x5450, ];
let mut ioctl_rules = Vec::new();
for &request in ioctl_allowed {
let condition = SeccompCondition::new(
1, seccompiler::SeccompCmpArgLen::Dword,
seccompiler::SeccompCmpOp::Eq,
request,
)
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create ioctl condition: {}", e))
})?;
let rule = SeccompRule::new(vec![condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create ioctl rule: {}", e))
})?;
ioctl_rules.push(rule);
}
rules.insert(libc::SYS_ioctl, ioctl_rules);
let prctl_allowed: &[u64] = &[
1, 2, 15, 16, 23, 27, 36, 37, 38, 40, 47, 39, ];
let mut prctl_rules = Vec::new();
for &option in prctl_allowed {
let condition = SeccompCondition::new(
0, seccompiler::SeccompCmpArgLen::Dword,
seccompiler::SeccompCmpOp::Eq,
option,
)
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create prctl condition: {}", e))
})?;
let rule = SeccompRule::new(vec![condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create prctl rule: {}", e))
})?;
prctl_rules.push(rule);
}
rules.insert(libc::SYS_prctl, prctl_rules);
let prlimit_condition = SeccompCondition::new(
2, seccompiler::SeccompCmpArgLen::Qword,
seccompiler::SeccompCmpOp::Eq,
0u64, )
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create prlimit64 condition: {}", e))
})?;
let prlimit_rule = SeccompRule::new(vec![prlimit_condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create prlimit64 rule: {}", e))
})?;
rules.insert(libc::SYS_prlimit64, vec![prlimit_rule]);
let mut mprotect_rules = Vec::new();
for allowed in [0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64] {
let condition = SeccompCondition::new(
2, seccompiler::SeccompCmpArgLen::Dword,
seccompiler::SeccompCmpOp::MaskedEq((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
allowed,
)
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create mprotect condition: {}", e))
})?;
let rule = SeccompRule::new(vec![condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create mprotect rule: {}", e))
})?;
mprotect_rules.push(rule);
}
rules.insert(libc::SYS_mprotect, mprotect_rules);
if Self::has_effective_cap(21) {
return Err(NucleusError::SeccompError(
"SECURITY: CAP_SYS_ADMIN is still in the effective capability set. \
Capabilities must be dropped before installing seccomp filters \
(clone3 is allowed unconditionally)."
.to_string(),
));
}
rules.insert(libc::SYS_clone3, Vec::new());
let clone_condition = SeccompCondition::new(
0, seccompiler::SeccompCmpArgLen::Qword,
seccompiler::SeccompCmpOp::MaskedEq(DENIED_CLONE_NAMESPACE_FLAGS),
0, )
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create clone condition: {}", e))
})?;
let clone_rule = SeccompRule::new(vec![clone_condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create clone rule: {}", e))
})?;
rules.insert(libc::SYS_clone, vec![clone_rule]);
let execveat_condition = SeccompCondition::new(
4, seccompiler::SeccompCmpArgLen::Dword,
seccompiler::SeccompCmpOp::MaskedEq(libc::AT_EMPTY_PATH as u64),
0, )
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create execveat condition: {}", e))
})?;
let execveat_rule = SeccompRule::new(vec![execveat_condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create execveat rule: {}", e))
})?;
rules.insert(libc::SYS_execveat, vec![execveat_rule]);
Ok(rules)
}
pub fn compile_minimal_filter() -> Result<BpfProgram> {
let rules = Self::minimal_filter(true, &[])?;
let target_arch = std::env::consts::ARCH.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
})?;
super::seccomp_bpf::compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
target_arch,
)
}
#[cfg(test)]
pub(crate) fn minimal_filter_for_test(
allow_network: bool,
extra_syscalls: &[String],
) -> BTreeMap<i64, Vec<SeccompRule>> {
Self::minimal_filter(allow_network, extra_syscalls).unwrap()
}
pub fn apply_minimal_filter(&mut self) -> Result<bool> {
self.apply_minimal_filter_with_mode(false, false)
}
pub fn apply_minimal_filter_with_mode(
&mut self,
best_effort: bool,
log_denied: bool,
) -> Result<bool> {
self.apply_filter_for_network_mode(true, best_effort, log_denied, &[])
}
pub fn apply_filter_for_network_mode(
&mut self,
allow_network: bool,
best_effort: bool,
log_denied: bool,
extra_syscalls: &[String],
) -> Result<bool> {
if self.applied {
debug!("Seccomp filter already applied, skipping");
return Ok(true);
}
info!(allow_network, "Applying seccomp filter");
let rules = match Self::minimal_filter(allow_network, extra_syscalls) {
Ok(r) => r,
Err(e) => {
if best_effort {
warn!(
"Failed to create seccomp rules: {} (continuing without seccomp)",
e
);
return Ok(false);
}
return Err(e);
}
};
let target_arch = match std::env::consts::ARCH.try_into() {
Ok(a) => a,
Err(e) => {
let msg = format!("Unsupported architecture: {:?}", e);
if best_effort {
warn!("{} (continuing without seccomp)", msg);
return Ok(false);
}
return Err(NucleusError::SeccompError(msg));
}
};
let bpf_prog: BpfProgram = match super::seccomp_bpf::compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
target_arch,
) {
Ok(p) => p,
Err(e) => {
if best_effort {
warn!(
"Failed to compile BPF program: {} (continuing without seccomp)",
e
);
return Ok(false);
}
return Err(e);
}
};
match Self::apply_bpf_program(&bpf_prog, log_denied) {
Ok(_) => {
self.applied = true;
info!("Successfully applied seccomp filter");
Ok(true)
}
Err(e) => {
if best_effort {
warn!(
"Failed to apply seccomp filter: {} (continuing without seccomp)",
e
);
Ok(false)
} else {
Err(NucleusError::SeccompError(format!(
"Failed to apply seccomp filter: {}",
e
)))
}
}
}
}
pub fn apply_profile_from_file(
&mut self,
profile_path: &Path,
expected_sha256: Option<&str>,
audit_mode: bool,
) -> Result<bool> {
if self.applied {
debug!("Seccomp filter already applied, skipping");
return Ok(true);
}
info!("Loading seccomp profile from {:?}", profile_path);
let content = std::fs::read(profile_path).map_err(|e| {
NucleusError::SeccompError(format!(
"Failed to read seccomp profile {:?}: {}",
profile_path, e
))
})?;
if let Some(expected) = expected_sha256 {
let actual = sha256_hex(&content);
if actual != expected {
return Err(NucleusError::SeccompError(format!(
"Seccomp profile hash mismatch: expected {}, got {}",
expected, actual
)));
}
info!("Seccomp profile hash verified: {}", actual);
}
let profile: SeccompProfile = serde_json::from_slice(&content).map_err(|e| {
NucleusError::SeccompError(format!("Failed to parse seccomp profile: {}", e))
})?;
Self::warn_missing_arg_filters(&profile);
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
for syscall_group in &profile.syscalls {
if syscall_group.action == "SCMP_ACT_ALLOW" {
for name in &syscall_group.names {
if let Some(nr) = syscall_name_to_number(name) {
rules.insert(nr, Vec::new());
} else {
warn!("Unknown syscall in profile: {} (skipping)", name);
}
}
}
}
let builtin_rules = Self::minimal_filter(true, &[])?;
for syscall_name in Self::ARG_FILTERED_SYSCALLS {
if let Some(nr) = syscall_name_to_number(syscall_name) {
if let std::collections::btree_map::Entry::Occupied(mut entry) = rules.entry(nr) {
if let Some(builtin) = builtin_rules.get(&nr) {
if !builtin.is_empty() {
info!(
"Merging built-in argument filters for '{}' into custom profile",
syscall_name
);
entry.insert(builtin.clone());
}
}
}
}
}
if !rules.contains_key(&libc::SYS_clone3) {
rules.insert(libc::SYS_clone3, Vec::new());
}
let target_arch = std::env::consts::ARCH.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
})?;
let bpf_prog: BpfProgram = super::seccomp_bpf::compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
target_arch,
)?;
match Self::apply_bpf_program(&bpf_prog, audit_mode) {
Ok(_) => {
self.applied = true;
info!(
"Seccomp profile applied from {:?} (log_denied={})",
profile_path, audit_mode
);
Ok(true)
}
Err(e) => Err(e),
}
}
pub fn apply_trace_filter(&mut self) -> Result<bool> {
if self.applied {
debug!("Seccomp filter already applied, skipping trace filter");
return Ok(true);
}
info!("Applying seccomp trace filter (allow-all + LOG)");
let filter = SeccompFilter::new(
BTreeMap::new(),
SeccompAction::Allow, SeccompAction::Allow, std::env::consts::ARCH.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
})?,
)
.map_err(|e| NucleusError::SeccompError(format!("Failed to create trace filter: {}", e)))?;
let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Failed to compile trace BPF: {}", e))
})?;
Self::apply_bpf_program(&bpf_prog, true)?;
self.applied = true;
info!("Seccomp trace filter applied (all syscalls allowed + logged)");
Ok(true)
}
const ARG_FILTERED_SYSCALLS: &'static [&'static str] = &[
"clone", "clone3", "execveat", "ioctl", "mprotect", "prctl", "socket",
];
const OPT_IN_SYSCALLS: &'static [&'static str] = &[
"io_uring_setup",
"io_uring_enter",
"io_uring_register",
"msgget",
"msgsnd",
"msgrcv",
"msgctl",
"mq_open",
"mq_unlink",
"mq_timedsend",
"mq_timedreceive",
"mq_notify",
"mq_getsetattr",
"timer_create",
"timer_settime",
"timer_gettime",
"timer_getoverrun",
"timer_delete",
"inotify_init",
"inotify_init1",
"inotify_add_watch",
"inotify_rm_watch",
"fanotify_init",
"fanotify_mark",
"mincore",
"mlockall",
"munlockall",
"membarrier",
"process_madvise",
"mbind",
"set_mempolicy",
"get_mempolicy",
"set_mempolicy_home_node",
"pkey_mprotect",
"pkey_alloc",
"pkey_free",
"cachestat",
"remap_file_pages",
"sync",
"syncfs",
"sync_file_range",
"readahead",
"vmsplice",
"openat2",
"name_to_handle_at",
"open_by_handle_at",
"io_cancel",
"io_pgetevents",
"creat",
"fchmodat2",
"statmount",
"listmount",
"utimensat",
"utimes",
"utime",
"futimesat",
"setxattr",
"lsetxattr",
"fsetxattr",
"removexattr",
"lremovexattr",
"fremovexattr",
"setxattrat",
"getxattrat",
"listxattrat",
"removexattrat",
"recvmmsg",
"sendmmsg",
"sched_setparam",
"sched_setscheduler",
"sched_get_priority_max",
"sched_get_priority_min",
"sched_rr_get_interval",
"sched_setattr",
"sched_getattr",
"setrlimit",
"getpriority",
"setpriority",
"ioprio_set",
"ioprio_get",
"vfork",
"pause",
"alarm",
"tkill",
"sysinfo",
"personality",
"vhangup",
"time",
"pidfd_open",
"pidfd_send_signal",
"pidfd_getfd",
"setuid",
"setgid",
"setreuid",
"setregid",
"setresuid",
"getresuid",
"setresgid",
"getresgid",
"setfsuid",
"setfsgid",
"setgroups",
"getsid",
"capget",
"rt_tgsigqueueinfo",
"mknod",
"mknodat",
"syslog",
"clock_settime",
"clock_adjtime",
"adjtimex",
"unshare",
"kcmp",
"epoll_pwait2",
"futex_waitv",
"futex_wake",
"futex_wait",
"futex_requeue",
"seccomp",
"add_key",
"request_key",
"keyctl",
];
fn warn_missing_arg_filters(profile: &SeccompProfile) {
for group in &profile.syscalls {
if group.action != "SCMP_ACT_ALLOW" {
continue;
}
for name in &group.names {
if Self::ARG_FILTERED_SYSCALLS.contains(&name.as_str()) && group.args.is_empty() {
warn!(
"Custom seccomp profile allows '{}' without argument filters. \
The built-in filter restricts this syscall at the argument level. \
This profile weakens security compared to the default.",
name
);
}
}
}
}
fn has_effective_cap(cap: i32) -> bool {
let Ok(status) = std::fs::read_to_string("/proc/self/status") else {
return true;
};
for line in status.lines() {
if let Some(hex) = line.strip_prefix("CapEff:\t") {
if let Ok(eff) = u64::from_str_radix(hex.trim(), 16) {
return eff & (1u64 << cap) != 0;
}
}
}
true }
pub fn is_applied(&self) -> bool {
self.applied
}
fn apply_bpf_program(bpf_prog: &BpfProgram, log_denied: bool) -> Result<()> {
let mut flags: libc::c_ulong = 0;
if log_denied {
flags |= libc::SECCOMP_FILTER_FLAG_LOG as libc::c_ulong;
}
match Self::apply_bpf_program_with_flags(bpf_prog, flags) {
Ok(()) => Ok(()),
Err(err)
if log_denied
&& err.raw_os_error() == Some(libc::EINVAL)
&& libc::SECCOMP_FILTER_FLAG_LOG != 0 =>
{
warn!(
"Kernel rejected SECCOMP_FILTER_FLAG_LOG; continuing with seccomp \
enforcement without deny logging"
);
Self::apply_bpf_program_with_flags(bpf_prog, 0)?;
Ok(())
}
Err(err) => Err(NucleusError::SeccompError(format!(
"Failed to apply seccomp filter: {}",
err
))),
}
}
fn apply_bpf_program_with_flags(
bpf_prog: &BpfProgram,
flags: libc::c_ulong,
) -> std::io::Result<()> {
let rc = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
if rc != 0 {
return Err(std::io::Error::last_os_error());
}
let prog = libc::sock_fprog {
len: bpf_prog.len() as u16,
filter: bpf_prog.as_ptr() as *mut libc::sock_filter,
};
let rc = unsafe {
libc::syscall(
libc::SYS_seccomp,
libc::SECCOMP_SET_MODE_FILTER,
flags,
&prog as *const libc::sock_fprog,
)
};
if rc < 0 {
return Err(std::io::Error::last_os_error());
}
Ok(())
}
}
use crate::security::seccomp_generate::SeccompProfile;
fn syscall_name_to_number(name: &str) -> Option<i64> {
match name {
"read" => Some(libc::SYS_read),
"write" => Some(libc::SYS_write),
#[cfg(target_arch = "x86_64")]
"open" => Some(libc::SYS_open),
"openat" => Some(libc::SYS_openat),
"close" => Some(libc::SYS_close),
#[cfg(target_arch = "x86_64")]
"stat" => Some(libc::SYS_stat),
"fstat" => Some(libc::SYS_fstat),
#[cfg(target_arch = "x86_64")]
"lstat" => Some(libc::SYS_lstat),
"lseek" => Some(libc::SYS_lseek),
#[cfg(target_arch = "x86_64")]
"access" => Some(libc::SYS_access),
"fcntl" => Some(libc::SYS_fcntl),
"readv" => Some(libc::SYS_readv),
"writev" => Some(libc::SYS_writev),
"pread64" => Some(libc::SYS_pread64),
"pwrite64" => Some(libc::SYS_pwrite64),
#[cfg(target_arch = "x86_64")]
"readlink" => Some(libc::SYS_readlink),
"readlinkat" => Some(libc::SYS_readlinkat),
"newfstatat" => Some(libc::SYS_newfstatat),
"statx" => Some(libc::SYS_statx),
"faccessat" => Some(libc::SYS_faccessat),
"faccessat2" => Some(libc::SYS_faccessat2),
"dup" => Some(libc::SYS_dup),
#[cfg(target_arch = "x86_64")]
"dup2" => Some(libc::SYS_dup2),
"dup3" => Some(libc::SYS_dup3),
#[cfg(target_arch = "x86_64")]
"pipe" => Some(libc::SYS_pipe),
"pipe2" => Some(libc::SYS_pipe2),
#[cfg(target_arch = "x86_64")]
"unlink" => Some(libc::SYS_unlink),
"unlinkat" => Some(libc::SYS_unlinkat),
#[cfg(target_arch = "x86_64")]
"rename" => Some(libc::SYS_rename),
"renameat" => Some(libc::SYS_renameat),
"renameat2" => Some(libc::SYS_renameat2),
#[cfg(target_arch = "x86_64")]
"link" => Some(libc::SYS_link),
"linkat" => Some(libc::SYS_linkat),
#[cfg(target_arch = "x86_64")]
"symlink" => Some(libc::SYS_symlink),
"symlinkat" => Some(libc::SYS_symlinkat),
#[cfg(target_arch = "x86_64")]
"chmod" => Some(libc::SYS_chmod),
"fchmod" => Some(libc::SYS_fchmod),
"fchmodat" => Some(libc::SYS_fchmodat),
"truncate" => Some(libc::SYS_truncate),
"ftruncate" => Some(libc::SYS_ftruncate),
"fallocate" => Some(libc::SYS_fallocate),
#[cfg(target_arch = "x86_64")]
"fadvise64" => Some(libc::SYS_fadvise64),
"fsync" => Some(libc::SYS_fsync),
"fdatasync" => Some(libc::SYS_fdatasync),
"flock" => Some(libc::SYS_flock),
#[cfg(target_arch = "x86_64")]
"sendfile" => Some(libc::SYS_sendfile),
"copy_file_range" => Some(libc::SYS_copy_file_range),
"splice" => Some(libc::SYS_splice),
"tee" => Some(libc::SYS_tee),
"mmap" => Some(libc::SYS_mmap),
"munmap" => Some(libc::SYS_munmap),
"mprotect" => Some(libc::SYS_mprotect),
"brk" => Some(libc::SYS_brk),
"mremap" => Some(libc::SYS_mremap),
"madvise" => Some(libc::SYS_madvise),
"msync" => Some(libc::SYS_msync),
"mlock" => Some(libc::SYS_mlock),
"mlock2" => Some(libc::SYS_mlock2),
"munlock" => Some(libc::SYS_munlock),
"shmget" => Some(libc::SYS_shmget),
"shmat" => Some(libc::SYS_shmat),
"shmdt" => Some(libc::SYS_shmdt),
"shmctl" => Some(libc::SYS_shmctl),
"semget" => Some(libc::SYS_semget),
"semop" => Some(libc::SYS_semop),
"semctl" => Some(libc::SYS_semctl),
"semtimedop" => Some(libc::SYS_semtimedop),
#[cfg(target_arch = "x86_64")]
"fork" => Some(libc::SYS_fork),
"clone" => Some(libc::SYS_clone),
"clone3" => Some(libc::SYS_clone3),
"execve" => Some(libc::SYS_execve),
"execveat" => Some(libc::SYS_execveat),
"wait4" => Some(libc::SYS_wait4),
"waitid" => Some(libc::SYS_waitid),
"exit" => Some(libc::SYS_exit),
"exit_group" => Some(libc::SYS_exit_group),
"getpid" => Some(libc::SYS_getpid),
"gettid" => Some(libc::SYS_gettid),
"getuid" => Some(libc::SYS_getuid),
"getgid" => Some(libc::SYS_getgid),
"geteuid" => Some(libc::SYS_geteuid),
"getegid" => Some(libc::SYS_getegid),
"getppid" => Some(libc::SYS_getppid),
#[cfg(target_arch = "x86_64")]
"getpgrp" => Some(libc::SYS_getpgrp),
"setsid" => Some(libc::SYS_setsid),
"getgroups" => Some(libc::SYS_getgroups),
"rt_sigaction" => Some(libc::SYS_rt_sigaction),
"rt_sigprocmask" => Some(libc::SYS_rt_sigprocmask),
"rt_sigreturn" => Some(libc::SYS_rt_sigreturn),
"rt_sigsuspend" => Some(libc::SYS_rt_sigsuspend),
"rt_sigtimedwait" => Some(libc::SYS_rt_sigtimedwait),
"rt_sigpending" => Some(libc::SYS_rt_sigpending),
"rt_sigqueueinfo" => Some(libc::SYS_rt_sigqueueinfo),
"sigaltstack" => Some(libc::SYS_sigaltstack),
"restart_syscall" => Some(libc::SYS_restart_syscall),
"kill" => Some(libc::SYS_kill),
"tgkill" => Some(libc::SYS_tgkill),
"clock_gettime" => Some(libc::SYS_clock_gettime),
"clock_getres" => Some(libc::SYS_clock_getres),
"clock_nanosleep" => Some(libc::SYS_clock_nanosleep),
"gettimeofday" => Some(libc::SYS_gettimeofday),
"nanosleep" => Some(libc::SYS_nanosleep),
"getcwd" => Some(libc::SYS_getcwd),
"chdir" => Some(libc::SYS_chdir),
"fchdir" => Some(libc::SYS_fchdir),
#[cfg(target_arch = "x86_64")]
"mkdir" => Some(libc::SYS_mkdir),
"mkdirat" => Some(libc::SYS_mkdirat),
#[cfg(target_arch = "x86_64")]
"rmdir" => Some(libc::SYS_rmdir),
#[cfg(target_arch = "x86_64")]
"getdents" => Some(libc::SYS_getdents),
"getdents64" => Some(libc::SYS_getdents64),
"socket" => Some(libc::SYS_socket),
"connect" => Some(libc::SYS_connect),
"sendto" => Some(libc::SYS_sendto),
"recvfrom" => Some(libc::SYS_recvfrom),
"sendmsg" => Some(libc::SYS_sendmsg),
"recvmsg" => Some(libc::SYS_recvmsg),
"shutdown" => Some(libc::SYS_shutdown),
"bind" => Some(libc::SYS_bind),
"listen" => Some(libc::SYS_listen),
"accept" => Some(libc::SYS_accept),
"accept4" => Some(libc::SYS_accept4),
"setsockopt" => Some(libc::SYS_setsockopt),
"getsockopt" => Some(libc::SYS_getsockopt),
"getsockname" => Some(libc::SYS_getsockname),
"getpeername" => Some(libc::SYS_getpeername),
"socketpair" => Some(libc::SYS_socketpair),
#[cfg(target_arch = "x86_64")]
"poll" => Some(libc::SYS_poll),
"ppoll" => Some(libc::SYS_ppoll),
#[cfg(target_arch = "x86_64")]
"select" => Some(libc::SYS_select),
"pselect6" => Some(libc::SYS_pselect6),
#[cfg(target_arch = "x86_64")]
"epoll_create" => Some(libc::SYS_epoll_create),
"epoll_create1" => Some(libc::SYS_epoll_create1),
"epoll_ctl" => Some(libc::SYS_epoll_ctl),
#[cfg(target_arch = "x86_64")]
"epoll_wait" => Some(libc::SYS_epoll_wait),
"epoll_pwait" => Some(libc::SYS_epoll_pwait),
#[cfg(target_arch = "x86_64")]
"eventfd" => Some(libc::SYS_eventfd),
"eventfd2" => Some(libc::SYS_eventfd2),
#[cfg(target_arch = "x86_64")]
"signalfd" => Some(libc::SYS_signalfd),
"signalfd4" => Some(libc::SYS_signalfd4),
"timerfd_create" => Some(libc::SYS_timerfd_create),
"timerfd_settime" => Some(libc::SYS_timerfd_settime),
"timerfd_gettime" => Some(libc::SYS_timerfd_gettime),
"uname" => Some(libc::SYS_uname),
"getrandom" => Some(libc::SYS_getrandom),
"futex" => Some(libc::SYS_futex),
"set_tid_address" => Some(libc::SYS_set_tid_address),
"set_robust_list" => Some(libc::SYS_set_robust_list),
"get_robust_list" => Some(libc::SYS_get_robust_list),
#[cfg(target_arch = "x86_64")]
"arch_prctl" => Some(libc::SYS_arch_prctl),
"sysinfo" => Some(libc::SYS_sysinfo),
"umask" => Some(libc::SYS_umask),
#[cfg(target_arch = "x86_64")]
"getrlimit" => Some(libc::SYS_getrlimit),
"prlimit64" => Some(libc::SYS_prlimit64),
"getrusage" => Some(libc::SYS_getrusage),
"times" => Some(libc::SYS_times),
"sched_yield" => Some(libc::SYS_sched_yield),
"sched_getaffinity" => Some(libc::SYS_sched_getaffinity),
"getcpu" => Some(libc::SYS_getcpu),
"rseq" => Some(libc::SYS_rseq),
"close_range" => Some(libc::SYS_close_range),
"fchown" => Some(libc::SYS_fchown),
"fchownat" => Some(libc::SYS_fchownat),
#[cfg(target_arch = "x86_64")]
"chown" => Some(libc::SYS_chown),
#[cfg(target_arch = "x86_64")]
"lchown" => Some(libc::SYS_lchown),
"io_uring_setup" => Some(libc::SYS_io_uring_setup),
"io_uring_enter" => Some(libc::SYS_io_uring_enter),
"io_uring_register" => Some(libc::SYS_io_uring_register),
"io_setup" => Some(libc::SYS_io_setup),
"io_destroy" => Some(libc::SYS_io_destroy),
"io_submit" => Some(libc::SYS_io_submit),
"io_getevents" => Some(libc::SYS_io_getevents),
"setitimer" => Some(libc::SYS_setitimer),
"getitimer" => Some(libc::SYS_getitimer),
"setpgid" => Some(libc::SYS_setpgid),
"getpgid" => Some(libc::SYS_getpgid),
"memfd_create" => Some(libc::SYS_memfd_create),
"ioctl" => Some(libc::SYS_ioctl),
"prctl" => Some(libc::SYS_prctl),
"landlock_create_ruleset" => Some(libc::SYS_landlock_create_ruleset),
"landlock_add_rule" => Some(libc::SYS_landlock_add_rule),
"landlock_restrict_self" => Some(libc::SYS_landlock_restrict_self),
"mincore" => Some(libc::SYS_mincore),
"mlockall" => Some(libc::SYS_mlockall),
"munlockall" => Some(libc::SYS_munlockall),
"mbind" => Some(libc::SYS_mbind),
"set_mempolicy" => Some(libc::SYS_set_mempolicy),
"get_mempolicy" => Some(libc::SYS_get_mempolicy),
"memfd_secret" => Some(libc::SYS_memfd_secret),
"membarrier" => Some(libc::SYS_membarrier),
"process_madvise" => Some(libc::SYS_process_madvise),
"pkey_mprotect" => Some(libc::SYS_pkey_mprotect),
"pkey_alloc" => Some(libc::SYS_pkey_alloc),
"pkey_free" => Some(libc::SYS_pkey_free),
"mseal" => Some(libc::SYS_mseal),
"map_shadow_stack" => Some(453),
"remap_file_pages" => Some(libc::SYS_remap_file_pages),
"set_mempolicy_home_node" => Some(libc::SYS_set_mempolicy_home_node),
"cachestat" => Some(451),
#[cfg(target_arch = "x86_64")]
"vfork" => Some(libc::SYS_vfork),
#[cfg(target_arch = "x86_64")]
"pause" => Some(libc::SYS_pause),
#[cfg(target_arch = "x86_64")]
"alarm" => Some(libc::SYS_alarm),
"tkill" => Some(libc::SYS_tkill),
"ptrace" => Some(libc::SYS_ptrace),
"process_vm_readv" => Some(libc::SYS_process_vm_readv),
"process_vm_writev" => Some(libc::SYS_process_vm_writev),
"process_mrelease" => Some(libc::SYS_process_mrelease),
"kcmp" => Some(libc::SYS_kcmp),
"unshare" => Some(libc::SYS_unshare),
"setns" => Some(libc::SYS_setns),
"pidfd_open" => Some(libc::SYS_pidfd_open),
"pidfd_send_signal" => Some(libc::SYS_pidfd_send_signal),
"pidfd_getfd" => Some(libc::SYS_pidfd_getfd),
"setuid" => Some(libc::SYS_setuid),
"setgid" => Some(libc::SYS_setgid),
"setreuid" => Some(libc::SYS_setreuid),
"setregid" => Some(libc::SYS_setregid),
"setresuid" => Some(libc::SYS_setresuid),
"getresuid" => Some(libc::SYS_getresuid),
"setresgid" => Some(libc::SYS_setresgid),
"getresgid" => Some(libc::SYS_getresgid),
"setfsuid" => Some(libc::SYS_setfsuid),
"setfsgid" => Some(libc::SYS_setfsgid),
"setgroups" => Some(libc::SYS_setgroups),
"getsid" => Some(libc::SYS_getsid),
"capget" => Some(libc::SYS_capget),
"capset" => Some(libc::SYS_capset),
"rt_tgsigqueueinfo" => Some(libc::SYS_rt_tgsigqueueinfo),
"msgget" => Some(libc::SYS_msgget),
"msgsnd" => Some(libc::SYS_msgsnd),
"msgrcv" => Some(libc::SYS_msgrcv),
"msgctl" => Some(libc::SYS_msgctl),
"timer_create" => Some(libc::SYS_timer_create),
"timer_settime" => Some(libc::SYS_timer_settime),
"timer_gettime" => Some(libc::SYS_timer_gettime),
"timer_getoverrun" => Some(libc::SYS_timer_getoverrun),
"timer_delete" => Some(libc::SYS_timer_delete),
"clock_settime" => Some(libc::SYS_clock_settime),
"clock_adjtime" => Some(libc::SYS_clock_adjtime),
#[cfg(target_arch = "x86_64")]
"time" => Some(libc::SYS_time),
#[cfg(target_arch = "x86_64")]
"creat" => Some(libc::SYS_creat),
"readahead" => Some(libc::SYS_readahead),
"sync" => Some(libc::SYS_sync),
"syncfs" => Some(libc::SYS_syncfs),
"vmsplice" => Some(libc::SYS_vmsplice),
"utimensat" => Some(libc::SYS_utimensat),
#[cfg(target_arch = "x86_64")]
"utimes" => Some(libc::SYS_utimes),
#[cfg(target_arch = "x86_64")]
"utime" => Some(libc::SYS_utime),
#[cfg(target_arch = "x86_64")]
"futimesat" => Some(libc::SYS_futimesat),
"openat2" => Some(libc::SYS_openat2),
"name_to_handle_at" => Some(libc::SYS_name_to_handle_at),
"open_by_handle_at" => Some(libc::SYS_open_by_handle_at),
"fchmodat2" => Some(libc::SYS_fchmodat2),
"statmount" => Some(457),
"listmount" => Some(458),
"setxattr" => Some(libc::SYS_setxattr),
"lsetxattr" => Some(libc::SYS_lsetxattr),
"fsetxattr" => Some(libc::SYS_fsetxattr),
"removexattr" => Some(libc::SYS_removexattr),
"lremovexattr" => Some(libc::SYS_lremovexattr),
"fremovexattr" => Some(libc::SYS_fremovexattr),
"setxattrat" => Some(463),
"getxattrat" => Some(464),
"listxattrat" => Some(465),
"removexattrat" => Some(466),
"recvmmsg" => Some(libc::SYS_recvmmsg),
"sendmmsg" => Some(libc::SYS_sendmmsg),
#[cfg(target_arch = "x86_64")]
"inotify_init" => Some(libc::SYS_inotify_init),
"inotify_init1" => Some(libc::SYS_inotify_init1),
"inotify_add_watch" => Some(libc::SYS_inotify_add_watch),
"inotify_rm_watch" => Some(libc::SYS_inotify_rm_watch),
"fanotify_init" => Some(libc::SYS_fanotify_init),
"fanotify_mark" => Some(libc::SYS_fanotify_mark),
"epoll_pwait2" => Some(libc::SYS_epoll_pwait2),
"sched_setparam" => Some(libc::SYS_sched_setparam),
"sched_setscheduler" => Some(libc::SYS_sched_setscheduler),
"sched_get_priority_max" => Some(libc::SYS_sched_get_priority_max),
"sched_get_priority_min" => Some(libc::SYS_sched_get_priority_min),
"sched_rr_get_interval" => Some(libc::SYS_sched_rr_get_interval),
"sched_setattr" => Some(libc::SYS_sched_setattr),
"sched_getattr" => Some(libc::SYS_sched_getattr),
"sched_setaffinity" => Some(libc::SYS_sched_setaffinity),
#[cfg(target_arch = "x86_64")]
"setrlimit" => Some(libc::SYS_setrlimit),
"getpriority" => Some(libc::SYS_getpriority),
"setpriority" => Some(libc::SYS_setpriority),
"ioprio_set" => Some(libc::SYS_ioprio_set),
"ioprio_get" => Some(libc::SYS_ioprio_get),
"futex_waitv" => Some(libc::SYS_futex_waitv),
"futex_wake" => Some(454),
"futex_wait" => Some(455),
"futex_requeue" => Some(456),
"init_module" => Some(libc::SYS_init_module),
"finit_module" => Some(libc::SYS_finit_module),
"delete_module" => Some(libc::SYS_delete_module),
"bpf" => Some(libc::SYS_bpf),
"perf_event_open" => Some(libc::SYS_perf_event_open),
"seccomp" => Some(libc::SYS_seccomp),
"userfaultfd" => Some(libc::SYS_userfaultfd),
"mount" => Some(libc::SYS_mount),
"umount2" => Some(libc::SYS_umount2),
"pivot_root" => Some(libc::SYS_pivot_root),
"mount_setattr" => Some(libc::SYS_mount_setattr),
"open_tree" => Some(libc::SYS_open_tree),
"open_tree_attr" => Some(467),
"move_mount" => Some(libc::SYS_move_mount),
"fsopen" => Some(libc::SYS_fsopen),
"fsconfig" => Some(libc::SYS_fsconfig),
"fsmount" => Some(libc::SYS_fsmount),
"fspick" => Some(libc::SYS_fspick),
"syslog" => Some(libc::SYS_syslog),
"reboot" => Some(libc::SYS_reboot),
"swapon" => Some(libc::SYS_swapon),
"swapoff" => Some(libc::SYS_swapoff),
"chroot" => Some(libc::SYS_chroot),
"acct" => Some(libc::SYS_acct),
"settimeofday" => Some(libc::SYS_settimeofday),
"sethostname" => Some(libc::SYS_sethostname),
"setdomainname" => Some(libc::SYS_setdomainname),
"adjtimex" => Some(libc::SYS_adjtimex),
#[cfg(target_arch = "x86_64")]
"modify_ldt" => Some(libc::SYS_modify_ldt),
#[cfg(target_arch = "x86_64")]
"iopl" => Some(libc::SYS_iopl),
#[cfg(target_arch = "x86_64")]
"ioperm" => Some(libc::SYS_ioperm),
"quotactl" => Some(libc::SYS_quotactl),
"quotactl_fd" => Some(libc::SYS_quotactl_fd),
"personality" => Some(libc::SYS_personality),
"vhangup" => Some(libc::SYS_vhangup),
#[cfg(target_arch = "x86_64")]
"ustat" => Some(libc::SYS_ustat),
#[cfg(target_arch = "x86_64")]
"sysfs" => Some(libc::SYS_sysfs),
"mknod" => Some(libc::SYS_mknod),
"mknodat" => Some(libc::SYS_mknodat),
"migrate_pages" => Some(libc::SYS_migrate_pages),
"move_pages" => Some(libc::SYS_move_pages),
#[cfg(target_arch = "x86_64")]
"kexec_load" => Some(libc::SYS_kexec_load),
"kexec_file_load" => Some(libc::SYS_kexec_file_load),
"mq_open" => Some(libc::SYS_mq_open),
"mq_unlink" => Some(libc::SYS_mq_unlink),
"mq_timedsend" => Some(libc::SYS_mq_timedsend),
"mq_timedreceive" => Some(libc::SYS_mq_timedreceive),
"mq_notify" => Some(libc::SYS_mq_notify),
"mq_getsetattr" => Some(libc::SYS_mq_getsetattr),
"add_key" => Some(libc::SYS_add_key),
"request_key" => Some(libc::SYS_request_key),
"keyctl" => Some(libc::SYS_keyctl),
"io_pgetevents" => Some(333),
"lsm_get_self_attr" => Some(459),
"lsm_set_self_attr" => Some(460),
"lsm_list_modules" => Some(461),
#[cfg(target_arch = "x86_64")]
"lookup_dcookie" => Some(libc::SYS_lookup_dcookie),
"uretprobe" => Some(335),
_ => None,
}
}
impl Default for SeccompManager {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_seccomp_manager_initial_state() {
let mgr = SeccompManager::new();
assert!(!mgr.is_applied());
}
#[test]
fn test_apply_idempotent() {
let mgr = SeccompManager::new();
assert!(!mgr.is_applied());
}
#[test]
fn test_clone_denied_flags_include_newcgroup() {
assert_ne!(
DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWCGROUP as u64,
0
);
}
#[test]
fn test_clone_denied_flags_include_newtime() {
assert_ne!(
DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWTIME as u64,
0,
"CLONE_NEWTIME must be in denied clone namespace flags"
);
}
#[test]
fn test_network_none_socket_domains_are_unix_only() {
let domains = SeccompManager::allowed_socket_domains(false);
assert_eq!(domains, vec![libc::AF_UNIX]);
}
#[test]
fn test_network_enabled_socket_domains_exclude_netlink() {
let domains = SeccompManager::allowed_socket_domains(true);
assert!(domains.contains(&libc::AF_UNIX));
assert!(domains.contains(&libc::AF_INET));
assert!(domains.contains(&libc::AF_INET6));
assert!(!domains.contains(&libc::AF_NETLINK));
}
#[test]
fn test_network_mode_syscalls_only_enabled_when_network_allowed() {
let none = SeccompManager::network_mode_syscalls(false);
assert!(none.is_empty());
let enabled = SeccompManager::network_mode_syscalls(true);
assert!(enabled.contains(&libc::SYS_connect));
assert!(enabled.contains(&libc::SYS_bind));
assert!(enabled.contains(&libc::SYS_listen));
assert!(enabled.contains(&libc::SYS_accept));
assert!(enabled.contains(&libc::SYS_setsockopt));
}
#[test]
fn test_landlock_bootstrap_syscalls_present_in_base_allowlist() {
let base = SeccompManager::base_allowed_syscalls();
assert!(base.contains(&libc::SYS_landlock_create_ruleset));
assert!(base.contains(&libc::SYS_landlock_add_rule));
assert!(base.contains(&libc::SYS_landlock_restrict_self));
}
#[test]
fn test_x32_legacy_range_not_allowlisted() {
let base = SeccompManager::base_allowed_syscalls();
let net = SeccompManager::network_mode_syscalls(true);
for nr in 512_i64..=547_i64 {
assert!(
!base.contains(&nr) && !net.contains(&nr),
"x32 syscall number {} unexpectedly allowlisted",
nr
);
}
}
#[test]
fn test_i386_compat_socketcall_range_not_allowlisted() {
let base = SeccompManager::base_allowed_syscalls();
let net = SeccompManager::network_mode_syscalls(true);
for nr in 359_i64..=373_i64 {
assert!(
!base.contains(&nr) && !net.contains(&nr),
"i386 compat syscall number {} unexpectedly allowlisted",
nr
);
}
}
#[test]
fn test_minimal_filter_allowlist_counts_are_stable() {
let base = SeccompManager::base_allowed_syscalls();
let net = SeccompManager::network_mode_syscalls(true);
assert_eq!(base.len(), 173);
assert_eq!(net.len(), 11);
assert_eq!(base.len() + 8, 181);
assert_eq!(base.len() + net.len() + 8, 192);
}
#[test]
fn test_arg_filtered_syscalls_list_includes_critical_syscalls() {
for name in &["clone", "clone3", "execveat", "ioctl", "prctl", "socket"] {
assert!(
SeccompManager::ARG_FILTERED_SYSCALLS.contains(name),
"'{}' must be in ARG_FILTERED_SYSCALLS",
name
);
}
}
#[test]
fn test_clone3_allowed_in_minimal_filter() {
let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
assert!(
rules.contains_key(&libc::SYS_clone3),
"clone3 must be in the seccomp allowlist (glibc 2.34+ requires it)"
);
}
#[test]
fn test_clone_is_allowed_with_arg_filter() {
let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
assert!(
rules.contains_key(&libc::SYS_clone),
"clone must be in the seccomp allowlist with arg filters"
);
}
#[test]
fn test_high_risk_syscalls_removed_from_base_allowlist() {
let base = SeccompManager::base_allowed_syscalls();
let removed = [
libc::SYS_sync,
libc::SYS_syncfs,
libc::SYS_mincore,
libc::SYS_vfork,
libc::SYS_tkill,
libc::SYS_io_uring_setup,
libc::SYS_io_uring_enter,
libc::SYS_io_uring_register,
];
for syscall in removed {
assert!(
!base.contains(&syscall),
"syscall {} unexpectedly present in base allowlist",
syscall
);
}
}
#[test]
fn test_custom_profile_preserves_clone_arg_filters() {
let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
for name in SeccompManager::ARG_FILTERED_SYSCALLS {
if *name == "clone3" {
continue;
}
if let Some(nr) = syscall_name_to_number(name) {
let entry = rules.get(&nr);
assert!(
entry.is_some() && !entry.unwrap().is_empty(),
"built-in filter must have argument-level rules for '{}' \
so apply_profile_from_file can merge them into custom profiles",
name
);
}
}
}
#[test]
fn test_memfd_create_not_in_default_allowlist() {
let base = SeccompManager::base_allowed_syscalls();
assert!(
!base.contains(&libc::SYS_memfd_create),
"memfd_create must not be in the default seccomp allowlist (fileless exec risk)"
);
let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
assert!(
!rules.contains_key(&libc::SYS_memfd_create),
"memfd_create must not be in the compiled seccomp filter rules"
);
}
#[test]
fn test_mprotect_has_arg_filtering() {
let base = SeccompManager::base_allowed_syscalls();
assert!(
!base.contains(&libc::SYS_mprotect),
"SYS_mprotect must not be unconditionally allowed - needs arg filtering"
);
let rules = SeccompManager::minimal_filter(true, &[]).unwrap();
let mprotect_rules = rules.get(&libc::SYS_mprotect);
assert!(
mprotect_rules.is_some(),
"mprotect must be present in the seccomp filter rules"
);
assert!(
!mprotect_rules.unwrap().is_empty(),
"mprotect must have argument-level conditions to prevent W^X violations"
);
}
#[test]
fn test_unsafe_blocks_have_safety_comments() {
let source = include_str!("seccomp.rs");
let mut pos = 0;
while let Some(idx) = source[pos..].find("unsafe {") {
let abs_idx = pos + idx;
let start = abs_idx.saturating_sub(200);
let context = &source[start..abs_idx];
assert!(
context.contains("SAFETY:"),
"unsafe block at byte {} must have a // SAFETY: comment. Context: ...{}...",
abs_idx,
&source[abs_idx.saturating_sub(80)..abs_idx + 10]
);
pos = abs_idx + 1;
}
}
fn mprotect_would_allow(prot: u64) -> bool {
let mask = (libc::PROT_WRITE | libc::PROT_EXEC) as u64;
let allowed_values: &[u64] = &[0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64];
let masked = prot & mask;
allowed_values.contains(&masked)
}
#[test]
fn test_mprotect_allows_prot_none() {
assert!(mprotect_would_allow(0), "PROT_NONE must be allowed");
}
#[test]
fn test_mprotect_allows_prot_read_only() {
assert!(
mprotect_would_allow(libc::PROT_READ as u64),
"PROT_READ must be allowed (W|X bits are 0)"
);
}
#[test]
fn test_mprotect_allows_prot_read_write() {
assert!(
mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE) as u64),
"PROT_READ|PROT_WRITE must be allowed"
);
}
#[test]
fn test_mprotect_allows_prot_read_exec() {
assert!(
mprotect_would_allow((libc::PROT_READ | libc::PROT_EXEC) as u64),
"PROT_READ|PROT_EXEC must be allowed"
);
}
#[test]
fn test_mprotect_rejects_prot_write_exec() {
assert!(
!mprotect_would_allow((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
"PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
);
}
#[test]
fn test_mprotect_rejects_prot_read_write_exec() {
assert!(
!mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC) as u64),
"PROT_READ|PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
);
}
#[test]
fn test_mprotect_allows_prot_write_alone() {
assert!(
mprotect_would_allow(libc::PROT_WRITE as u64),
"PROT_WRITE alone must be allowed"
);
}
#[test]
fn test_mprotect_allows_prot_exec_alone() {
assert!(
mprotect_would_allow(libc::PROT_EXEC as u64),
"PROT_EXEC alone must be allowed"
);
}
#[test]
fn test_extra_syscalls_are_merged_into_filter() {
let extra = vec!["io_uring_setup".to_string(), "sysinfo".to_string()];
let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
assert!(
rules.contains_key(&libc::SYS_io_uring_setup),
"io_uring_setup must be in filter when requested via extra_syscalls"
);
assert!(
rules.contains_key(&libc::SYS_sysinfo),
"sysinfo must be in filter when requested via extra_syscalls"
);
}
#[test]
fn test_extra_syscalls_do_not_override_arg_filtered() {
let extra = vec!["clone".to_string()];
let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
let clone_rules = rules.get(&libc::SYS_clone);
assert!(
clone_rules.is_some() && !clone_rules.unwrap().is_empty(),
"clone must retain argument-level filtering even when in extra_syscalls"
);
}
#[test]
fn test_extra_syscalls_unknown_name_is_warned_and_skipped() {
let extra = vec!["not_a_real_syscall".to_string()];
let result = SeccompManager::minimal_filter(true, &extra);
assert!(
result.is_ok(),
"Unknown syscall name should warn and skip, not error"
);
}
#[test]
fn test_extra_syscalls_empty_is_noop() {
let rules_without = SeccompManager::minimal_filter(true, &[]).unwrap();
let rules_with = SeccompManager::minimal_filter(true, &[]).unwrap();
assert_eq!(rules_without.len(), rules_with.len());
}
#[test]
fn test_extra_syscalls_duplicate_of_default_is_harmless() {
let extra = vec!["read".to_string()];
let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
assert!(rules.contains_key(&libc::SYS_read));
}
#[test]
fn test_extra_syscalls_blocked_known_syscall_not_added() {
let extra = vec!["kexec_load".to_string()];
let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
assert!(
!rules.contains_key(&libc::SYS_kexec_load),
"kexec_load must be blocked even when requested via --seccomp-allow"
);
}
#[test]
fn test_extra_syscalls_opt_in_syscall_is_added() {
let extra = vec!["io_uring_setup".to_string()];
let rules = SeccompManager::minimal_filter(true, &extra).unwrap();
assert!(
rules.contains_key(&libc::SYS_io_uring_setup),
"io_uring_setup is in OPT_IN_SYSCALLS and must be added"
);
}
}