use crate::error::{NucleusError, Result};
use crate::security::policy::sha256_hex;
use seccompiler::{BpfProgram, SeccompAction, SeccompCondition, SeccompFilter, SeccompRule};
use std::collections::BTreeMap;
use std::path::Path;
use tracing::{debug, info, warn};
pub struct SeccompManager {
applied: bool,
}
const DENIED_CLONE_NAMESPACE_FLAGS: u64 = (libc::CLONE_NEWUSER
| libc::CLONE_NEWNS
| libc::CLONE_NEWNET
| libc::CLONE_NEWIPC
| libc::CLONE_NEWUTS
| libc::CLONE_NEWPID
| libc::CLONE_NEWCGROUP
| libc::CLONE_NEWTIME) as u64;
impl SeccompManager {
pub fn new() -> Self {
Self { applied: false }
}
fn base_allowed_syscalls() -> Vec<i64> {
let mut syscalls = vec![
libc::SYS_read,
libc::SYS_write,
libc::SYS_openat,
libc::SYS_close,
libc::SYS_fstat,
libc::SYS_lseek,
libc::SYS_fcntl,
libc::SYS_readv,
libc::SYS_writev,
libc::SYS_pread64,
libc::SYS_pwrite64,
libc::SYS_readlinkat,
libc::SYS_newfstatat,
libc::SYS_statx,
libc::SYS_faccessat,
libc::SYS_faccessat2,
libc::SYS_dup,
libc::SYS_dup3,
libc::SYS_pipe2,
libc::SYS_unlinkat,
libc::SYS_renameat,
libc::SYS_renameat2,
libc::SYS_linkat,
libc::SYS_symlinkat,
libc::SYS_fchmod,
libc::SYS_fchmodat,
libc::SYS_truncate,
libc::SYS_ftruncate,
libc::SYS_fallocate,
#[cfg(target_arch = "x86_64")]
libc::SYS_fadvise64,
libc::SYS_fsync,
libc::SYS_fdatasync,
libc::SYS_flock,
#[cfg(target_arch = "x86_64")]
libc::SYS_sendfile,
libc::SYS_copy_file_range,
libc::SYS_splice,
libc::SYS_tee,
libc::SYS_mmap,
libc::SYS_munmap,
libc::SYS_brk,
libc::SYS_mremap,
libc::SYS_madvise,
libc::SYS_msync,
libc::SYS_execve,
libc::SYS_wait4,
libc::SYS_waitid,
libc::SYS_exit,
libc::SYS_exit_group,
libc::SYS_getpid,
libc::SYS_gettid,
libc::SYS_getuid,
libc::SYS_getgid,
libc::SYS_geteuid,
libc::SYS_getegid,
libc::SYS_getppid,
libc::SYS_setsid,
libc::SYS_getgroups,
libc::SYS_rt_sigaction,
libc::SYS_rt_sigprocmask,
libc::SYS_rt_sigreturn,
libc::SYS_rt_sigsuspend,
libc::SYS_sigaltstack,
libc::SYS_kill,
libc::SYS_tgkill,
libc::SYS_clock_gettime,
libc::SYS_clock_getres,
libc::SYS_clock_nanosleep,
libc::SYS_gettimeofday,
libc::SYS_nanosleep,
libc::SYS_getcwd,
libc::SYS_chdir,
libc::SYS_fchdir,
libc::SYS_mkdirat,
libc::SYS_getdents64,
libc::SYS_uname,
libc::SYS_getrandom,
libc::SYS_futex,
libc::SYS_set_tid_address,
libc::SYS_set_robust_list,
libc::SYS_get_robust_list,
libc::SYS_sysinfo,
libc::SYS_umask,
libc::SYS_prlimit64,
libc::SYS_getrusage,
libc::SYS_times,
libc::SYS_sched_yield,
libc::SYS_sched_getaffinity,
libc::SYS_getcpu,
libc::SYS_rseq,
libc::SYS_close_range,
libc::SYS_landlock_create_ruleset,
libc::SYS_landlock_add_rule,
libc::SYS_landlock_restrict_self,
libc::SYS_getsockname,
libc::SYS_getpeername,
libc::SYS_socketpair,
libc::SYS_getsockopt,
libc::SYS_ppoll,
libc::SYS_pselect6,
libc::SYS_epoll_create1,
libc::SYS_epoll_ctl,
libc::SYS_epoll_pwait,
libc::SYS_eventfd2,
libc::SYS_signalfd4,
libc::SYS_timerfd_create,
libc::SYS_timerfd_settime,
libc::SYS_timerfd_gettime,
];
#[cfg(target_arch = "x86_64")]
syscalls.extend_from_slice(&[
libc::SYS_open,
libc::SYS_stat,
libc::SYS_lstat,
libc::SYS_access,
libc::SYS_readlink,
libc::SYS_dup2,
libc::SYS_pipe,
libc::SYS_unlink,
libc::SYS_rename,
libc::SYS_link,
libc::SYS_symlink,
libc::SYS_chmod,
libc::SYS_mkdir,
libc::SYS_rmdir,
libc::SYS_getdents,
libc::SYS_getpgrp,
libc::SYS_arch_prctl,
libc::SYS_getrlimit,
libc::SYS_poll,
libc::SYS_select,
libc::SYS_epoll_create,
libc::SYS_epoll_wait,
libc::SYS_eventfd,
libc::SYS_signalfd,
]);
syscalls
}
fn allowed_socket_domains(allow_network: bool) -> Vec<i32> {
if allow_network {
vec![libc::AF_UNIX, libc::AF_INET, libc::AF_INET6]
} else {
vec![libc::AF_UNIX]
}
}
fn network_mode_syscalls(allow_network: bool) -> Vec<i64> {
if allow_network {
vec![
libc::SYS_connect,
libc::SYS_sendto,
libc::SYS_recvfrom,
libc::SYS_sendmsg,
libc::SYS_recvmsg,
libc::SYS_shutdown,
libc::SYS_bind,
libc::SYS_listen,
libc::SYS_accept,
libc::SYS_accept4,
libc::SYS_setsockopt,
]
} else {
Vec::new()
}
}
fn minimal_filter(allow_network: bool) -> Result<BTreeMap<i64, Vec<SeccompRule>>> {
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
let allowed_syscalls = Self::base_allowed_syscalls();
for syscall in allowed_syscalls {
rules.insert(syscall, Vec::new());
}
for syscall in Self::network_mode_syscalls(allow_network) {
rules.insert(syscall, Vec::new());
}
let mut socket_rules = Vec::new();
for domain in Self::allowed_socket_domains(allow_network) {
let condition = SeccompCondition::new(
0, seccompiler::SeccompCmpArgLen::Dword,
seccompiler::SeccompCmpOp::Eq,
domain as u64,
)
.map_err(|e| {
NucleusError::SeccompError(format!(
"Failed to create socket domain condition: {}",
e
))
})?;
let rule = SeccompRule::new(vec![condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create socket rule: {}", e))
})?;
socket_rules.push(rule);
}
rules.insert(libc::SYS_socket, socket_rules);
let ioctl_allowed: &[u64] = &[
0x5401, 0x5402, 0x5403, 0x5404, 0x540B, 0x540F, 0x5410, 0x5413, 0x5429, 0x541B, 0x5451, 0x5450, ];
let mut ioctl_rules = Vec::new();
for &request in ioctl_allowed {
let condition = SeccompCondition::new(
1, seccompiler::SeccompCmpArgLen::Dword,
seccompiler::SeccompCmpOp::Eq,
request,
)
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create ioctl condition: {}", e))
})?;
let rule = SeccompRule::new(vec![condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create ioctl rule: {}", e))
})?;
ioctl_rules.push(rule);
}
rules.insert(libc::SYS_ioctl, ioctl_rules);
let prctl_allowed: &[u64] = &[
1, 2, 15, 16, 38, 39, ];
let mut prctl_rules = Vec::new();
for &option in prctl_allowed {
let condition = SeccompCondition::new(
0, seccompiler::SeccompCmpArgLen::Dword,
seccompiler::SeccompCmpOp::Eq,
option,
)
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create prctl condition: {}", e))
})?;
let rule = SeccompRule::new(vec![condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create prctl rule: {}", e))
})?;
prctl_rules.push(rule);
}
rules.insert(libc::SYS_prctl, prctl_rules);
let mut mprotect_rules = Vec::new();
for allowed in [0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64] {
let condition = SeccompCondition::new(
2, seccompiler::SeccompCmpArgLen::Dword,
seccompiler::SeccompCmpOp::MaskedEq((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
allowed,
)
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create mprotect condition: {}", e))
})?;
let rule = SeccompRule::new(vec![condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create mprotect rule: {}", e))
})?;
mprotect_rules.push(rule);
}
rules.insert(libc::SYS_mprotect, mprotect_rules);
rules.insert(libc::SYS_clone3, Vec::new());
let clone_condition = SeccompCondition::new(
0, seccompiler::SeccompCmpArgLen::Qword,
seccompiler::SeccompCmpOp::MaskedEq(DENIED_CLONE_NAMESPACE_FLAGS),
0, )
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create clone condition: {}", e))
})?;
let clone_rule = SeccompRule::new(vec![clone_condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create clone rule: {}", e))
})?;
rules.insert(libc::SYS_clone, vec![clone_rule]);
let execveat_condition = SeccompCondition::new(
4, seccompiler::SeccompCmpArgLen::Dword,
seccompiler::SeccompCmpOp::MaskedEq(libc::AT_EMPTY_PATH as u64),
0, )
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create execveat condition: {}", e))
})?;
let execveat_rule = SeccompRule::new(vec![execveat_condition]).map_err(|e| {
NucleusError::SeccompError(format!("Failed to create execveat rule: {}", e))
})?;
rules.insert(libc::SYS_execveat, vec![execveat_rule]);
Ok(rules)
}
pub fn compile_minimal_filter() -> Result<BpfProgram> {
let rules = Self::minimal_filter(true)?;
let filter = SeccompFilter::new(
rules,
SeccompAction::Errno(libc::EPERM as u32),
SeccompAction::Allow,
std::env::consts::ARCH.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
})?,
)
.map_err(|e| {
NucleusError::SeccompError(format!("Failed to create seccomp filter: {}", e))
})?;
let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Failed to compile BPF program: {}", e))
})?;
Ok(bpf_prog)
}
pub fn apply_minimal_filter(&mut self) -> Result<bool> {
self.apply_minimal_filter_with_mode(false, false)
}
pub fn apply_minimal_filter_with_mode(
&mut self,
best_effort: bool,
log_denied: bool,
) -> Result<bool> {
self.apply_filter_for_network_mode(true, best_effort, log_denied)
}
pub fn apply_filter_for_network_mode(
&mut self,
allow_network: bool,
best_effort: bool,
log_denied: bool,
) -> Result<bool> {
if self.applied {
debug!("Seccomp filter already applied, skipping");
return Ok(true);
}
info!(allow_network, "Applying seccomp filter");
let rules = match Self::minimal_filter(allow_network) {
Ok(r) => r,
Err(e) => {
if best_effort {
warn!(
"Failed to create seccomp rules: {} (continuing without seccomp)",
e
);
return Ok(false);
}
return Err(e);
}
};
let filter = match SeccompFilter::new(
rules,
SeccompAction::Errno(libc::EPERM as u32), SeccompAction::Allow, std::env::consts::ARCH.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
})?,
) {
Ok(f) => f,
Err(e) => {
if best_effort {
warn!(
"Failed to create seccomp filter: {} (continuing without seccomp)",
e
);
return Ok(false);
}
return Err(NucleusError::SeccompError(format!(
"Failed to create seccomp filter: {}",
e
)));
}
};
let bpf_prog: BpfProgram = match filter.try_into() {
Ok(p) => p,
Err(e) => {
if best_effort {
warn!(
"Failed to compile BPF program: {} (continuing without seccomp)",
e
);
return Ok(false);
}
return Err(NucleusError::SeccompError(format!(
"Failed to compile BPF program: {}",
e
)));
}
};
match Self::apply_bpf_program(&bpf_prog, log_denied) {
Ok(_) => {
self.applied = true;
info!("Successfully applied seccomp filter");
Ok(true)
}
Err(e) => {
if best_effort {
warn!(
"Failed to apply seccomp filter: {} (continuing without seccomp)",
e
);
Ok(false)
} else {
Err(NucleusError::SeccompError(format!(
"Failed to apply seccomp filter: {}",
e
)))
}
}
}
}
pub fn apply_profile_from_file(
&mut self,
profile_path: &Path,
expected_sha256: Option<&str>,
audit_mode: bool,
) -> Result<bool> {
if self.applied {
debug!("Seccomp filter already applied, skipping");
return Ok(true);
}
info!("Loading seccomp profile from {:?}", profile_path);
let content = std::fs::read(profile_path).map_err(|e| {
NucleusError::SeccompError(format!(
"Failed to read seccomp profile {:?}: {}",
profile_path, e
))
})?;
if let Some(expected) = expected_sha256 {
let actual = sha256_hex(&content);
if actual != expected {
return Err(NucleusError::SeccompError(format!(
"Seccomp profile hash mismatch: expected {}, got {}",
expected, actual
)));
}
info!("Seccomp profile hash verified: {}", actual);
}
let profile: SeccompProfile = serde_json::from_slice(&content).map_err(|e| {
NucleusError::SeccompError(format!("Failed to parse seccomp profile: {}", e))
})?;
Self::warn_missing_arg_filters(&profile);
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
for syscall_group in &profile.syscalls {
if syscall_group.action == "SCMP_ACT_ALLOW" {
for name in &syscall_group.names {
if let Some(nr) = syscall_name_to_number(name) {
rules.insert(nr, Vec::new());
} else {
warn!("Unknown syscall in profile: {} (skipping)", name);
}
}
}
}
let builtin_rules = Self::minimal_filter(true)?;
for syscall_name in Self::ARG_FILTERED_SYSCALLS {
if let Some(nr) = syscall_name_to_number(syscall_name) {
if let std::collections::btree_map::Entry::Occupied(mut entry) = rules.entry(nr) {
if let Some(builtin) = builtin_rules.get(&nr) {
if !builtin.is_empty() {
info!(
"Merging built-in argument filters for '{}' into custom profile",
syscall_name
);
entry.insert(builtin.clone());
}
}
}
}
}
rules.remove(&libc::SYS_clone3);
let filter = SeccompFilter::new(
rules,
SeccompAction::Errno(libc::EPERM as u32),
SeccompAction::Allow,
std::env::consts::ARCH.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
})?,
)
.map_err(|e| {
NucleusError::SeccompError(format!(
"Failed to create seccomp filter from profile: {}",
e
))
})?;
let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Failed to compile BPF program from profile: {}", e))
})?;
match Self::apply_bpf_program(&bpf_prog, audit_mode) {
Ok(_) => {
self.applied = true;
info!(
"Seccomp profile applied from {:?} (log_denied={})",
profile_path, audit_mode
);
Ok(true)
}
Err(e) => Err(e),
}
}
pub fn apply_trace_filter(&mut self) -> Result<bool> {
if self.applied {
debug!("Seccomp filter already applied, skipping trace filter");
return Ok(true);
}
info!("Applying seccomp trace filter (allow-all + LOG)");
let filter = SeccompFilter::new(
BTreeMap::new(),
SeccompAction::Allow, SeccompAction::Allow, std::env::consts::ARCH.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Unsupported architecture: {:?}", e))
})?,
)
.map_err(|e| NucleusError::SeccompError(format!("Failed to create trace filter: {}", e)))?;
let bpf_prog: BpfProgram = filter.try_into().map_err(|e| {
NucleusError::SeccompError(format!("Failed to compile trace BPF: {}", e))
})?;
Self::apply_bpf_program(&bpf_prog, true)?;
self.applied = true;
info!("Seccomp trace filter applied (all syscalls allowed + logged)");
Ok(true)
}
const ARG_FILTERED_SYSCALLS: &'static [&'static str] =
&["clone", "clone3", "execveat", "ioctl", "mprotect", "prctl", "socket"];
fn warn_missing_arg_filters(profile: &SeccompProfile) {
for group in &profile.syscalls {
if group.action != "SCMP_ACT_ALLOW" {
continue;
}
for name in &group.names {
if Self::ARG_FILTERED_SYSCALLS.contains(&name.as_str()) && group.args.is_empty() {
warn!(
"Custom seccomp profile allows '{}' without argument filters. \
The built-in filter restricts this syscall at the argument level. \
This profile weakens security compared to the default.",
name
);
}
}
}
}
pub fn is_applied(&self) -> bool {
self.applied
}
fn apply_bpf_program(bpf_prog: &BpfProgram, log_denied: bool) -> Result<()> {
let mut flags: libc::c_ulong = 0;
if log_denied {
flags |= libc::SECCOMP_FILTER_FLAG_LOG as libc::c_ulong;
}
match Self::apply_bpf_program_with_flags(bpf_prog, flags) {
Ok(()) => Ok(()),
Err(err)
if log_denied
&& err.raw_os_error() == Some(libc::EINVAL)
&& libc::SECCOMP_FILTER_FLAG_LOG != 0 =>
{
warn!(
"Kernel rejected SECCOMP_FILTER_FLAG_LOG; continuing with seccomp \
enforcement without deny logging"
);
Self::apply_bpf_program_with_flags(bpf_prog, 0)?;
Ok(())
}
Err(err) => Err(NucleusError::SeccompError(format!(
"Failed to apply seccomp filter: {}",
err
))),
}
}
fn apply_bpf_program_with_flags(
bpf_prog: &BpfProgram,
flags: libc::c_ulong,
) -> std::io::Result<()> {
let rc = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
if rc != 0 {
return Err(std::io::Error::last_os_error());
}
let prog = libc::sock_fprog {
len: bpf_prog.len() as u16,
filter: bpf_prog.as_ptr() as *mut libc::sock_filter,
};
let rc = unsafe {
libc::syscall(
libc::SYS_seccomp,
libc::SECCOMP_SET_MODE_FILTER,
flags,
&prog as *const libc::sock_fprog,
)
};
if rc < 0 {
return Err(std::io::Error::last_os_error());
}
Ok(())
}
}
use crate::security::seccomp_generate::SeccompProfile;
fn syscall_name_to_number(name: &str) -> Option<i64> {
match name {
"read" => Some(libc::SYS_read),
"write" => Some(libc::SYS_write),
#[cfg(target_arch = "x86_64")]
"open" => Some(libc::SYS_open),
"openat" => Some(libc::SYS_openat),
"close" => Some(libc::SYS_close),
#[cfg(target_arch = "x86_64")]
"stat" => Some(libc::SYS_stat),
"fstat" => Some(libc::SYS_fstat),
#[cfg(target_arch = "x86_64")]
"lstat" => Some(libc::SYS_lstat),
"lseek" => Some(libc::SYS_lseek),
#[cfg(target_arch = "x86_64")]
"access" => Some(libc::SYS_access),
"fcntl" => Some(libc::SYS_fcntl),
"readv" => Some(libc::SYS_readv),
"writev" => Some(libc::SYS_writev),
"pread64" => Some(libc::SYS_pread64),
"pwrite64" => Some(libc::SYS_pwrite64),
#[cfg(target_arch = "x86_64")]
"readlink" => Some(libc::SYS_readlink),
"readlinkat" => Some(libc::SYS_readlinkat),
"newfstatat" => Some(libc::SYS_newfstatat),
"statx" => Some(libc::SYS_statx),
"faccessat" => Some(libc::SYS_faccessat),
"faccessat2" => Some(libc::SYS_faccessat2),
"dup" => Some(libc::SYS_dup),
#[cfg(target_arch = "x86_64")]
"dup2" => Some(libc::SYS_dup2),
"dup3" => Some(libc::SYS_dup3),
#[cfg(target_arch = "x86_64")]
"pipe" => Some(libc::SYS_pipe),
"pipe2" => Some(libc::SYS_pipe2),
#[cfg(target_arch = "x86_64")]
"unlink" => Some(libc::SYS_unlink),
"unlinkat" => Some(libc::SYS_unlinkat),
#[cfg(target_arch = "x86_64")]
"rename" => Some(libc::SYS_rename),
"renameat" => Some(libc::SYS_renameat),
"renameat2" => Some(libc::SYS_renameat2),
#[cfg(target_arch = "x86_64")]
"link" => Some(libc::SYS_link),
"linkat" => Some(libc::SYS_linkat),
#[cfg(target_arch = "x86_64")]
"symlink" => Some(libc::SYS_symlink),
"symlinkat" => Some(libc::SYS_symlinkat),
#[cfg(target_arch = "x86_64")]
"chmod" => Some(libc::SYS_chmod),
"fchmod" => Some(libc::SYS_fchmod),
"fchmodat" => Some(libc::SYS_fchmodat),
"truncate" => Some(libc::SYS_truncate),
"ftruncate" => Some(libc::SYS_ftruncate),
"fallocate" => Some(libc::SYS_fallocate),
#[cfg(target_arch = "x86_64")]
"fadvise64" => Some(libc::SYS_fadvise64),
"fsync" => Some(libc::SYS_fsync),
"fdatasync" => Some(libc::SYS_fdatasync),
"flock" => Some(libc::SYS_flock),
#[cfg(target_arch = "x86_64")]
"sendfile" => Some(libc::SYS_sendfile),
"copy_file_range" => Some(libc::SYS_copy_file_range),
"splice" => Some(libc::SYS_splice),
"tee" => Some(libc::SYS_tee),
"mmap" => Some(libc::SYS_mmap),
"munmap" => Some(libc::SYS_munmap),
"mprotect" => Some(libc::SYS_mprotect),
"brk" => Some(libc::SYS_brk),
"mremap" => Some(libc::SYS_mremap),
"madvise" => Some(libc::SYS_madvise),
"msync" => Some(libc::SYS_msync),
"mlock" => Some(libc::SYS_mlock),
"munlock" => Some(libc::SYS_munlock),
#[cfg(target_arch = "x86_64")]
"fork" => Some(libc::SYS_fork),
"clone" => Some(libc::SYS_clone),
"clone3" => Some(libc::SYS_clone3),
"execve" => Some(libc::SYS_execve),
"execveat" => Some(libc::SYS_execveat),
"wait4" => Some(libc::SYS_wait4),
"waitid" => Some(libc::SYS_waitid),
"exit" => Some(libc::SYS_exit),
"exit_group" => Some(libc::SYS_exit_group),
"getpid" => Some(libc::SYS_getpid),
"gettid" => Some(libc::SYS_gettid),
"getuid" => Some(libc::SYS_getuid),
"getgid" => Some(libc::SYS_getgid),
"geteuid" => Some(libc::SYS_geteuid),
"getegid" => Some(libc::SYS_getegid),
"getppid" => Some(libc::SYS_getppid),
#[cfg(target_arch = "x86_64")]
"getpgrp" => Some(libc::SYS_getpgrp),
"setsid" => Some(libc::SYS_setsid),
"getgroups" => Some(libc::SYS_getgroups),
"rt_sigaction" => Some(libc::SYS_rt_sigaction),
"rt_sigprocmask" => Some(libc::SYS_rt_sigprocmask),
"rt_sigreturn" => Some(libc::SYS_rt_sigreturn),
"rt_sigsuspend" => Some(libc::SYS_rt_sigsuspend),
"sigaltstack" => Some(libc::SYS_sigaltstack),
"kill" => Some(libc::SYS_kill),
"tgkill" => Some(libc::SYS_tgkill),
"clock_gettime" => Some(libc::SYS_clock_gettime),
"clock_getres" => Some(libc::SYS_clock_getres),
"clock_nanosleep" => Some(libc::SYS_clock_nanosleep),
"gettimeofday" => Some(libc::SYS_gettimeofday),
"nanosleep" => Some(libc::SYS_nanosleep),
"getcwd" => Some(libc::SYS_getcwd),
"chdir" => Some(libc::SYS_chdir),
"fchdir" => Some(libc::SYS_fchdir),
#[cfg(target_arch = "x86_64")]
"mkdir" => Some(libc::SYS_mkdir),
"mkdirat" => Some(libc::SYS_mkdirat),
#[cfg(target_arch = "x86_64")]
"rmdir" => Some(libc::SYS_rmdir),
#[cfg(target_arch = "x86_64")]
"getdents" => Some(libc::SYS_getdents),
"getdents64" => Some(libc::SYS_getdents64),
"socket" => Some(libc::SYS_socket),
"connect" => Some(libc::SYS_connect),
"sendto" => Some(libc::SYS_sendto),
"recvfrom" => Some(libc::SYS_recvfrom),
"sendmsg" => Some(libc::SYS_sendmsg),
"recvmsg" => Some(libc::SYS_recvmsg),
"shutdown" => Some(libc::SYS_shutdown),
"bind" => Some(libc::SYS_bind),
"listen" => Some(libc::SYS_listen),
"accept" => Some(libc::SYS_accept),
"accept4" => Some(libc::SYS_accept4),
"setsockopt" => Some(libc::SYS_setsockopt),
"getsockopt" => Some(libc::SYS_getsockopt),
"getsockname" => Some(libc::SYS_getsockname),
"getpeername" => Some(libc::SYS_getpeername),
"socketpair" => Some(libc::SYS_socketpair),
#[cfg(target_arch = "x86_64")]
"poll" => Some(libc::SYS_poll),
"ppoll" => Some(libc::SYS_ppoll),
#[cfg(target_arch = "x86_64")]
"select" => Some(libc::SYS_select),
"pselect6" => Some(libc::SYS_pselect6),
#[cfg(target_arch = "x86_64")]
"epoll_create" => Some(libc::SYS_epoll_create),
"epoll_create1" => Some(libc::SYS_epoll_create1),
"epoll_ctl" => Some(libc::SYS_epoll_ctl),
#[cfg(target_arch = "x86_64")]
"epoll_wait" => Some(libc::SYS_epoll_wait),
"epoll_pwait" => Some(libc::SYS_epoll_pwait),
#[cfg(target_arch = "x86_64")]
"eventfd" => Some(libc::SYS_eventfd),
"eventfd2" => Some(libc::SYS_eventfd2),
#[cfg(target_arch = "x86_64")]
"signalfd" => Some(libc::SYS_signalfd),
"signalfd4" => Some(libc::SYS_signalfd4),
"timerfd_create" => Some(libc::SYS_timerfd_create),
"timerfd_settime" => Some(libc::SYS_timerfd_settime),
"timerfd_gettime" => Some(libc::SYS_timerfd_gettime),
"uname" => Some(libc::SYS_uname),
"getrandom" => Some(libc::SYS_getrandom),
"futex" => Some(libc::SYS_futex),
"set_tid_address" => Some(libc::SYS_set_tid_address),
"set_robust_list" => Some(libc::SYS_set_robust_list),
"get_robust_list" => Some(libc::SYS_get_robust_list),
#[cfg(target_arch = "x86_64")]
"arch_prctl" => Some(libc::SYS_arch_prctl),
"sysinfo" => Some(libc::SYS_sysinfo),
"umask" => Some(libc::SYS_umask),
#[cfg(target_arch = "x86_64")]
"getrlimit" => Some(libc::SYS_getrlimit),
"prlimit64" => Some(libc::SYS_prlimit64),
"getrusage" => Some(libc::SYS_getrusage),
"times" => Some(libc::SYS_times),
"sched_yield" => Some(libc::SYS_sched_yield),
"sched_getaffinity" => Some(libc::SYS_sched_getaffinity),
"getcpu" => Some(libc::SYS_getcpu),
"rseq" => Some(libc::SYS_rseq),
"close_range" => Some(libc::SYS_close_range),
"memfd_create" => Some(libc::SYS_memfd_create),
"ioctl" => Some(libc::SYS_ioctl),
"prctl" => Some(libc::SYS_prctl),
"landlock_create_ruleset" => Some(libc::SYS_landlock_create_ruleset),
"landlock_add_rule" => Some(libc::SYS_landlock_add_rule),
"landlock_restrict_self" => Some(libc::SYS_landlock_restrict_self),
_ => None,
}
}
impl Default for SeccompManager {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_seccomp_manager_initial_state() {
let mgr = SeccompManager::new();
assert!(!mgr.is_applied());
}
#[test]
fn test_apply_idempotent() {
let mgr = SeccompManager::new();
assert!(!mgr.is_applied());
}
#[test]
fn test_clone_denied_flags_include_newcgroup() {
assert_ne!(
DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWCGROUP as u64,
0
);
}
#[test]
fn test_clone_denied_flags_include_newtime() {
assert_ne!(
DENIED_CLONE_NAMESPACE_FLAGS & libc::CLONE_NEWTIME as u64,
0,
"CLONE_NEWTIME must be in denied clone namespace flags"
);
}
#[test]
fn test_network_none_socket_domains_are_unix_only() {
let domains = SeccompManager::allowed_socket_domains(false);
assert_eq!(domains, vec![libc::AF_UNIX]);
}
#[test]
fn test_network_enabled_socket_domains_exclude_netlink() {
let domains = SeccompManager::allowed_socket_domains(true);
assert!(domains.contains(&libc::AF_UNIX));
assert!(domains.contains(&libc::AF_INET));
assert!(domains.contains(&libc::AF_INET6));
assert!(!domains.contains(&libc::AF_NETLINK));
}
#[test]
fn test_network_mode_syscalls_only_enabled_when_network_allowed() {
let none = SeccompManager::network_mode_syscalls(false);
assert!(none.is_empty());
let enabled = SeccompManager::network_mode_syscalls(true);
assert!(enabled.contains(&libc::SYS_connect));
assert!(enabled.contains(&libc::SYS_bind));
assert!(enabled.contains(&libc::SYS_listen));
assert!(enabled.contains(&libc::SYS_accept));
assert!(enabled.contains(&libc::SYS_setsockopt));
}
#[test]
fn test_landlock_bootstrap_syscalls_present_in_base_allowlist() {
let base = SeccompManager::base_allowed_syscalls();
assert!(base.contains(&libc::SYS_landlock_create_ruleset));
assert!(base.contains(&libc::SYS_landlock_add_rule));
assert!(base.contains(&libc::SYS_landlock_restrict_self));
}
#[test]
fn test_x32_legacy_range_not_allowlisted() {
let base = SeccompManager::base_allowed_syscalls();
let net = SeccompManager::network_mode_syscalls(true);
for nr in 512_i64..=547_i64 {
assert!(
!base.contains(&nr) && !net.contains(&nr),
"x32 syscall number {} unexpectedly allowlisted",
nr
);
}
}
#[test]
fn test_i386_compat_socketcall_range_not_allowlisted() {
let base = SeccompManager::base_allowed_syscalls();
let net = SeccompManager::network_mode_syscalls(true);
for nr in 359_i64..=373_i64 {
assert!(
!base.contains(&nr) && !net.contains(&nr),
"i386 compat syscall number {} unexpectedly allowlisted",
nr
);
}
}
#[test]
fn test_minimal_filter_allowlist_counts_are_stable() {
let base = SeccompManager::base_allowed_syscalls();
let net = SeccompManager::network_mode_syscalls(true);
assert_eq!(base.len(), 131);
assert_eq!(net.len(), 11);
assert_eq!(base.len() + 7, 138);
assert_eq!(base.len() + net.len() + 7, 149);
}
#[test]
fn test_arg_filtered_syscalls_list_includes_critical_syscalls() {
for name in &["clone", "clone3", "execveat", "ioctl", "prctl", "socket"] {
assert!(
SeccompManager::ARG_FILTERED_SYSCALLS.contains(name),
"'{}' must be in ARG_FILTERED_SYSCALLS",
name
);
}
}
#[test]
fn test_clone3_allowed_in_minimal_filter() {
let rules = SeccompManager::minimal_filter(true).unwrap();
assert!(
rules.contains_key(&libc::SYS_clone3),
"clone3 must be in the seccomp allowlist (glibc 2.34+ requires it)"
);
}
#[test]
fn test_clone_is_allowed_with_arg_filter() {
let rules = SeccompManager::minimal_filter(true).unwrap();
assert!(
rules.contains_key(&libc::SYS_clone),
"clone must be in the seccomp allowlist with arg filters"
);
}
#[test]
fn test_high_risk_syscalls_removed_from_base_allowlist() {
let base = SeccompManager::base_allowed_syscalls();
let removed = [
libc::SYS_chown,
libc::SYS_fchown,
libc::SYS_lchown,
libc::SYS_fchownat,
libc::SYS_sync,
libc::SYS_syncfs,
libc::SYS_mlock,
libc::SYS_munlock,
libc::SYS_mincore,
libc::SYS_vfork,
libc::SYS_tkill,
];
for syscall in removed {
assert!(
!base.contains(&syscall),
"syscall {} unexpectedly present in base allowlist",
syscall
);
}
}
#[test]
fn test_custom_profile_preserves_clone_arg_filters() {
let rules = SeccompManager::minimal_filter(true).unwrap();
for name in SeccompManager::ARG_FILTERED_SYSCALLS {
if *name == "clone3" {
continue;
}
if let Some(nr) = syscall_name_to_number(name) {
let entry = rules.get(&nr);
assert!(
entry.is_some() && !entry.unwrap().is_empty(),
"built-in filter must have argument-level rules for '{}' \
so apply_profile_from_file can merge them into custom profiles",
name
);
}
}
}
#[test]
fn test_memfd_create_not_in_default_allowlist() {
let base = SeccompManager::base_allowed_syscalls();
assert!(
!base.contains(&libc::SYS_memfd_create),
"memfd_create must not be in the default seccomp allowlist (fileless exec risk)"
);
let rules = SeccompManager::minimal_filter(true).unwrap();
assert!(
!rules.contains_key(&libc::SYS_memfd_create),
"memfd_create must not be in the compiled seccomp filter rules"
);
}
#[test]
fn test_mprotect_has_arg_filtering() {
let base = SeccompManager::base_allowed_syscalls();
assert!(
!base.contains(&libc::SYS_mprotect),
"SYS_mprotect must not be unconditionally allowed - needs arg filtering"
);
let rules = SeccompManager::minimal_filter(true).unwrap();
let mprotect_rules = rules.get(&libc::SYS_mprotect);
assert!(
mprotect_rules.is_some(),
"mprotect must be present in the seccomp filter rules"
);
assert!(
!mprotect_rules.unwrap().is_empty(),
"mprotect must have argument-level conditions to prevent W^X violations"
);
}
#[test]
fn test_unsafe_blocks_have_safety_comments() {
let source = include_str!("seccomp.rs");
let mut pos = 0;
while let Some(idx) = source[pos..].find("unsafe {") {
let abs_idx = pos + idx;
let start = abs_idx.saturating_sub(200);
let context = &source[start..abs_idx];
assert!(
context.contains("SAFETY:"),
"unsafe block at byte {} must have a // SAFETY: comment. Context: ...{}...",
abs_idx,
&source[abs_idx.saturating_sub(80)..abs_idx + 10]
);
pos = abs_idx + 1;
}
}
fn mprotect_would_allow(prot: u64) -> bool {
let mask = (libc::PROT_WRITE | libc::PROT_EXEC) as u64;
let allowed_values: &[u64] = &[0, libc::PROT_WRITE as u64, libc::PROT_EXEC as u64];
let masked = prot & mask;
allowed_values.contains(&masked)
}
#[test]
fn test_mprotect_allows_prot_none() {
assert!(mprotect_would_allow(0), "PROT_NONE must be allowed");
}
#[test]
fn test_mprotect_allows_prot_read_only() {
assert!(
mprotect_would_allow(libc::PROT_READ as u64),
"PROT_READ must be allowed (W|X bits are 0)"
);
}
#[test]
fn test_mprotect_allows_prot_read_write() {
assert!(
mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE) as u64),
"PROT_READ|PROT_WRITE must be allowed"
);
}
#[test]
fn test_mprotect_allows_prot_read_exec() {
assert!(
mprotect_would_allow((libc::PROT_READ | libc::PROT_EXEC) as u64),
"PROT_READ|PROT_EXEC must be allowed"
);
}
#[test]
fn test_mprotect_rejects_prot_write_exec() {
assert!(
!mprotect_would_allow((libc::PROT_WRITE | libc::PROT_EXEC) as u64),
"PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
);
}
#[test]
fn test_mprotect_rejects_prot_read_write_exec() {
assert!(
!mprotect_would_allow((libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC) as u64),
"PROT_READ|PROT_WRITE|PROT_EXEC (W^X violation) must be REJECTED"
);
}
#[test]
fn test_mprotect_allows_prot_write_alone() {
assert!(
mprotect_would_allow(libc::PROT_WRITE as u64),
"PROT_WRITE alone must be allowed"
);
}
#[test]
fn test_mprotect_allows_prot_exec_alone() {
assert!(
mprotect_would_allow(libc::PROT_EXEC as u64),
"PROT_EXEC alone must be allowed"
);
}
}