use std::path::{Path, PathBuf};
const MAX_FILTER_LEN: usize = 4096 * 8;
const SOCK_FILTER_SIZE: usize = std::mem::size_of::<libc::sock_filter>();
const SECCOMP_DATA_NR_OFFSET: u32 = 0;
const SECCOMP_DATA_ARCH_OFFSET: u32 = 4;
const SECCOMP_RET_ALLOW: u32 = 0x7fff_0000;
const SECCOMP_RET_ERRNO: u32 = 0x0005_0000;
const SECCOMP_RET_KILL_PROCESS: u32 = 0x8000_0000;
const BPF_LD: u16 = 0x00;
const BPF_W: u16 = 0x00;
const BPF_ABS: u16 = 0x20;
const BPF_JMP: u16 = 0x05;
const BPF_JEQ: u16 = 0x10;
const BPF_K: u16 = 0x00;
const BPF_RET: u16 = 0x06;
const PR_SET_NO_NEW_PRIVS: libc::c_int = 38;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum SeccompSelection {
Baseline,
ExternalBpf,
}
#[cfg(target_arch = "x86_64")]
const SECCOMP_AUDIT_ARCH: u32 = 0xc000_003e;
#[cfg(target_arch = "aarch64")]
const SECCOMP_AUDIT_ARCH: u32 = 0xc000_00b7;
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
const SECCOMP_AUDIT_ARCH: u32 = 0;
pub(crate) fn seccomp_bpf_path_from_env() -> Option<PathBuf> {
let raw = std::env::var_os("CELLOS_SECCOMP_BPF_PATH")?;
if raw.is_empty() {
return None;
}
Some(PathBuf::from(raw))
}
#[allow(dead_code)]
pub(crate) fn seccomp_requested_from_env() -> bool {
if seccomp_bpf_path_from_env().is_some() {
return true;
}
std::env::var("CELLOS_SUBPROCESS_SECCOMP")
.map(|raw| {
let value = raw.trim();
!value.is_empty()
&& value != "0"
&& !value.eq_ignore_ascii_case("off")
&& !value.eq_ignore_ascii_case("none")
})
.unwrap_or(false)
}
pub(crate) fn load_seccomp_program_from_env() -> Result<Option<Vec<u8>>, String> {
let selection = seccomp_selection_from_env()?;
let bpf_path = seccomp_bpf_path_from_env();
match (selection, bpf_path) {
(None, None) => Ok(None),
(None, Some(path)) => load_seccomp_program_from_file(&path).map(Some),
(Some(SeccompSelection::Baseline), None) => Ok(Some(baseline_seccomp_program()?)),
(Some(SeccompSelection::Baseline), Some(path)) => Err(format!(
"CELLOS_SUBPROCESS_SECCOMP=baseline cannot be combined with CELLOS_SECCOMP_BPF_PATH ({})",
path.display()
)),
(Some(SeccompSelection::ExternalBpf), Some(path)) => {
load_seccomp_program_from_file(&path).map(Some)
}
(Some(SeccompSelection::ExternalBpf), None) => Err(
"CELLOS_SUBPROCESS_SECCOMP=bpf requires CELLOS_SECCOMP_BPF_PATH".into(),
),
}
}
fn seccomp_selection_from_env() -> Result<Option<SeccompSelection>, String> {
let Ok(raw) = std::env::var("CELLOS_SUBPROCESS_SECCOMP") else {
return Ok(None);
};
let value = raw.trim();
if value.is_empty()
|| value == "0"
|| value.eq_ignore_ascii_case("off")
|| value.eq_ignore_ascii_case("none")
{
return Ok(None);
}
if value == "1"
|| value.eq_ignore_ascii_case("default")
|| value.eq_ignore_ascii_case("baseline")
{
return Ok(Some(SeccompSelection::Baseline));
}
if value.eq_ignore_ascii_case("bpf")
|| value.eq_ignore_ascii_case("path")
|| value.eq_ignore_ascii_case("file")
{
return Ok(Some(SeccompSelection::ExternalBpf));
}
Err(format!(
"CELLOS_SUBPROCESS_SECCOMP must be one of: 1, default, baseline, bpf, 0, off, none (got {value:?})"
))
}
fn load_seccomp_program_from_file(path: &Path) -> Result<Vec<u8>, String> {
let bytes = std::fs::read(path)
.map_err(|e| format!("CELLOS_SECCOMP_BPF_PATH {}: {e}", path.display()))?;
validate_seccomp_bpf_bytes(&bytes)?;
Ok(bytes)
}
pub(crate) fn validate_seccomp_bpf_bytes(bytes: &[u8]) -> Result<(), String> {
if bytes.is_empty() {
return Err("seccomp BPF program is empty".into());
}
if bytes.len() % SOCK_FILTER_SIZE != 0 {
return Err(format!(
"seccomp BPF length {} is not a multiple of {} (sock_filter size)",
bytes.len(),
SOCK_FILTER_SIZE
));
}
if bytes.len() > MAX_FILTER_LEN {
return Err(format!(
"seccomp BPF program too large (max {MAX_FILTER_LEN} bytes)"
));
}
Ok(())
}
fn baseline_seccomp_program() -> Result<Vec<u8>, String> {
if SECCOMP_AUDIT_ARCH == 0 {
return Err(
"bundled baseline seccomp profile currently supports x86_64 and aarch64 only".into(),
);
}
let mut filters = vec![stmt(BPF_LD + BPF_W + BPF_ABS, SECCOMP_DATA_ARCH_OFFSET)];
filters.push(jump(BPF_JMP + BPF_JEQ + BPF_K, SECCOMP_AUDIT_ARCH, 1, 0));
filters.push(stmt(BPF_RET + BPF_K, SECCOMP_RET_KILL_PROCESS));
filters.push(stmt(BPF_LD + BPF_W + BPF_ABS, SECCOMP_DATA_NR_OFFSET));
for nr in baseline_blocked_syscalls() {
filters.push(jump(BPF_JMP + BPF_JEQ + BPF_K, *nr as u32, 0, 1));
filters.push(stmt(
BPF_RET + BPF_K,
SECCOMP_RET_ERRNO | (libc::EPERM as u32),
));
}
filters.push(stmt(BPF_RET + BPF_K, SECCOMP_RET_ALLOW));
sock_filter_bytes(&filters)
}
fn baseline_blocked_syscalls() -> &'static [libc::c_long] {
&[
libc::SYS_ptrace,
libc::SYS_process_vm_readv,
libc::SYS_process_vm_writev,
libc::SYS_bpf,
libc::SYS_perf_event_open,
libc::SYS_keyctl,
libc::SYS_add_key,
libc::SYS_request_key,
libc::SYS_mount,
libc::SYS_umount2,
libc::SYS_pivot_root,
libc::SYS_setns,
libc::SYS_unshare,
libc::SYS_init_module,
libc::SYS_finit_module,
libc::SYS_delete_module,
libc::SYS_kexec_load,
]
}
fn stmt(code: u16, k: u32) -> libc::sock_filter {
libc::sock_filter {
code,
jt: 0,
jf: 0,
k,
}
}
fn jump(code: u16, k: u32, jt: u8, jf: u8) -> libc::sock_filter {
libc::sock_filter { code, jt, jf, k }
}
fn sock_filter_bytes(filters: &[libc::sock_filter]) -> Result<Vec<u8>, String> {
let len = std::mem::size_of_val(filters);
let bytes = unsafe { std::slice::from_raw_parts(filters.as_ptr().cast::<u8>(), len) }.to_vec();
validate_seccomp_bpf_bytes(&bytes)?;
Ok(bytes)
}
pub(crate) fn apply_seccomp_filter(program: &[u8]) -> Result<(), std::io::Error> {
validate_seccomp_bpf_bytes(program)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?;
unsafe {
if libc::prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0 {
return Err(std::io::Error::last_os_error());
}
let n_insn = program.len() / SOCK_FILTER_SIZE;
let fprog = libc::sock_fprog {
len: n_insn as libc::c_ushort,
filter: program.as_ptr().cast_mut().cast(),
};
const SECCOMP_SET_MODE_FILTER: libc::c_uint = 1;
let rc = libc::syscall(
libc::SYS_seccomp,
SECCOMP_SET_MODE_FILTER as libc::c_long,
0 as libc::c_long,
&fprog as *const libc::sock_fprog,
);
if rc != 0 {
return Err(std::io::Error::last_os_error());
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::{
baseline_seccomp_program, load_seccomp_program_from_env, seccomp_requested_from_env,
validate_seccomp_bpf_bytes,
};
use std::sync::Mutex;
static ENV_LOCK: Mutex<()> = Mutex::new(());
struct EnvGuard {
key: &'static str,
value: Option<std::ffi::OsString>,
}
impl EnvGuard {
fn set(key: &'static str, value: &str) -> Self {
let original = std::env::var_os(key);
std::env::set_var(key, value);
Self {
key,
value: original,
}
}
fn remove(key: &'static str) -> Self {
let original = std::env::var_os(key);
std::env::remove_var(key);
Self {
key,
value: original,
}
}
}
impl Drop for EnvGuard {
fn drop(&mut self) {
if let Some(value) = self.value.take() {
std::env::set_var(self.key, value);
} else {
std::env::remove_var(self.key);
}
}
}
#[test]
fn validate_rejects_empty() {
assert!(validate_seccomp_bpf_bytes(&[]).is_err());
}
#[test]
fn validate_rejects_bad_length() {
assert!(validate_seccomp_bpf_bytes(&[0u8; 7]).is_err());
}
#[test]
fn validate_accepts_minimal_non_empty_multiple_of_8() {
assert!(validate_seccomp_bpf_bytes(&[0u8; 8]).is_ok());
}
#[test]
fn baseline_program_is_valid_bpf() {
let bytes = baseline_seccomp_program().expect("baseline program");
validate_seccomp_bpf_bytes(&bytes).expect("valid baseline bytes");
}
#[test]
fn requested_detects_baseline_profile() {
let _lock = ENV_LOCK.lock().unwrap();
let _profile = EnvGuard::set("CELLOS_SUBPROCESS_SECCOMP", "baseline");
let _path = EnvGuard::remove("CELLOS_SECCOMP_BPF_PATH");
assert!(seccomp_requested_from_env());
}
#[test]
fn load_prefers_baseline_profile() {
let _lock = ENV_LOCK.lock().unwrap();
let _profile = EnvGuard::set("CELLOS_SUBPROCESS_SECCOMP", "baseline");
let _path = EnvGuard::remove("CELLOS_SECCOMP_BPF_PATH");
assert!(load_seccomp_program_from_env().unwrap().is_some());
}
#[test]
fn load_rejects_unknown_profile() {
let _lock = ENV_LOCK.lock().unwrap();
let _profile = EnvGuard::set("CELLOS_SUBPROCESS_SECCOMP", "nope");
let _path = EnvGuard::remove("CELLOS_SECCOMP_BPF_PATH");
assert!(load_seccomp_program_from_env().is_err());
}
}