use crate::error::{NucleusError, Result};
use seccompiler::{sock_filter, BpfProgram, SeccompAction, SeccompRule, TargetArch};
use std::collections::BTreeMap;
const BPF_LD: u16 = 0x00;
const BPF_LDX: u16 = 0x01;
const BPF_ST: u16 = 0x02;
const BPF_ALU: u16 = 0x04;
const BPF_JMP: u16 = 0x05;
const BPF_RET: u16 = 0x06;
const BPF_W: u16 = 0x00;
const BPF_ABS: u16 = 0x20;
const BPF_IMM: u16 = 0x00;
const BPF_MEM: u16 = 0x60;
const BPF_AND: u16 = 0x50;
const BPF_RSH: u16 = 0x70;
const BPF_JA: u16 = 0x00;
const BPF_JEQ: u16 = 0x10;
const BPF_K: u16 = 0x00;
const BPF_X: u16 = 0x08;
const SECCOMP_DATA_NR_OFFSET: u32 = 0;
const SECCOMP_DATA_ARCH_OFFSET: u32 = 4;
const BPF_MAX_LEN: usize = 4096;
const AUDIT_ARCH_X86_64: u32 = 62 | 0x8000_0000 | 0x4000_0000;
const AUDIT_ARCH_AARCH64: u32 = 183 | 0x8000_0000 | 0x4000_0000;
const AUDIT_ARCH_RISCV64: u32 = 243 | 0x8000_0000 | 0x4000_0000;
fn arch_audit_value(arch: TargetArch) -> u32 {
match arch {
TargetArch::x86_64 => AUDIT_ARCH_X86_64,
TargetArch::aarch64 => AUDIT_ARCH_AARCH64,
TargetArch::riscv64 => AUDIT_ARCH_RISCV64,
}
}
const NUM_BITMAP_WORDS: usize = 15;
const M_BIT_POS: u32 = 0;
#[inline(always)]
fn bpf_stmt(code: u16, k: u32) -> sock_filter {
sock_filter {
code,
jt: 0,
jf: 0,
k,
}
}
#[inline(always)]
fn bpf_jump(code: u16, k: u32, jt: u8, jf: u8) -> sock_filter {
sock_filter { code, jt, jf, k }
}
pub fn compile_bitmap_bpf(
rules: BTreeMap<i64, Vec<SeccompRule>>,
mismatch_action: SeccompAction,
match_action: SeccompAction,
target_arch: TargetArch,
) -> Result<BpfProgram> {
let mismatch_val: u32 = mismatch_action.into();
let match_val: u32 = match_action.into();
let audit_arch = arch_audit_value(target_arch);
let mut bitmap: [u32; NUM_BITMAP_WORDS] = [0; NUM_BITMAP_WORDS];
let mut arg_filtered: Vec<(i64, Vec<SeccompRule>)> = Vec::new();
for (nr, chain) in rules {
if nr < 0 {
continue;
}
if chain.is_empty() {
let word_idx = (nr >> 5) as usize;
if word_idx < NUM_BITMAP_WORDS {
bitmap[word_idx] |= 1u32 << (nr & 31);
}
} else {
arg_filtered.push((nr, chain));
}
}
let arg_section = build_arg_section(arg_filtered, mismatch_val, match_val);
let active_words: Vec<(usize, u32)> = bitmap
.iter()
.enumerate()
.filter(|(_, &w)| w != 0)
.map(|(i, &w)| (i, w))
.collect();
let dispatch_entry_count = active_words.len();
let dispatch_len = dispatch_entry_count * 3 + 1;
let test_len: usize = 5;
let mut prog: BpfProgram = Vec::with_capacity(64 + arg_section.len());
prog.push(bpf_stmt(BPF_LD | BPF_W | BPF_ABS, SECCOMP_DATA_ARCH_OFFSET));
prog.push(bpf_jump(BPF_JMP | BPF_JEQ | BPF_K, audit_arch, 1, 0));
prog.push(bpf_stmt(
BPF_RET | BPF_K,
libc::SECCOMP_RET_KILL_PROCESS as u32,
));
prog.push(bpf_stmt(BPF_LD | BPF_W | BPF_ABS, SECCOMP_DATA_NR_OFFSET)); prog.push(bpf_stmt(BPF_ALU | BPF_AND | BPF_K, 31)); prog.push(bpf_stmt(BPF_ST, M_BIT_POS)); prog.push(bpf_stmt(BPF_LD | BPF_W | BPF_ABS, SECCOMP_DATA_NR_OFFSET)); prog.push(bpf_stmt(BPF_ALU | BPF_RSH | BPF_K, 5));
let test_start = dispatch_len; for (entry_idx, &(word_idx, word_val)) in active_words.iter().enumerate() {
let insn_pos = entry_idx * 3; prog.push(bpf_jump(BPF_JMP | BPF_JEQ | BPF_K, word_idx as u32, 0, 2));
prog.push(bpf_stmt(BPF_LD | BPF_IMM, word_val));
let ja_offset = test_start - insn_pos - 3; prog.push(bpf_stmt(BPF_JMP | BPF_JA, ja_offset as u32));
}
prog.push(bpf_stmt(BPF_JMP | BPF_JA, test_len as u32));
prog.push(bpf_stmt(BPF_LDX | BPF_MEM, M_BIT_POS));
prog.push(bpf_stmt(BPF_ALU | BPF_RSH | BPF_X, 0));
prog.push(bpf_stmt(BPF_ALU | BPF_AND | BPF_K, 1));
prog.push(bpf_jump(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 1)); prog.push(bpf_stmt(BPF_RET | BPF_K, match_val));
prog.extend(arg_section);
prog.push(bpf_stmt(BPF_RET | BPF_K, mismatch_val));
if prog.len() >= BPF_MAX_LEN {
return Err(NucleusError::SeccompError(format!(
"BPF program too large: {} instructions (max {})",
prog.len(),
BPF_MAX_LEN
)));
}
Ok(prog)
}
fn build_arg_section(
arg_filtered: Vec<(i64, Vec<SeccompRule>)>,
mismatch_val: u32,
match_val: u32,
) -> BpfProgram {
if arg_filtered.is_empty() {
return Vec::new();
}
let mut section = Vec::new();
section.push(bpf_stmt(BPF_LD | BPF_W | BPF_ABS, SECCOMP_DATA_NR_OFFSET));
for (syscall_nr, chain) in arg_filtered {
build_syscall_chain(syscall_nr, chain, mismatch_val, match_val, &mut section);
}
section
}
fn build_syscall_chain(
syscall_nr: i64,
chain: Vec<SeccompRule>,
mismatch_val: u32,
match_val: u32,
out: &mut BpfProgram,
) {
let chain_bpf: Vec<BpfProgram> = chain
.into_iter()
.map(|rule| {
let mut bpf: BpfProgram = rule.into();
bpf.push(bpf_stmt(BPF_RET | BPF_K, match_val));
bpf
})
.collect();
out.push(bpf_jump(BPF_JMP | BPF_JEQ | BPF_K, syscall_nr as u32, 0, 1));
if chain_bpf.is_empty() {
out.push(bpf_stmt(BPF_JMP | BPF_JA, 1));
out.push(bpf_stmt(BPF_JMP | BPF_JA, 2));
out.push(bpf_stmt(BPF_RET | BPF_K, match_val));
} else {
for mut rule_bpf in chain_bpf {
out.append(&mut rule_bpf);
}
}
out.push(bpf_stmt(BPF_RET | BPF_K, mismatch_val));
}
#[cfg(test)]
mod tests {
use super::*;
use seccompiler::{SeccompCmpArgLen, SeccompCmpOp, SeccompCondition};
const AUDIT_ARCH_X86_64: u32 = 62 | 0x8000_0000 | 0x4000_0000;
fn bpf_eval(prog: &[sock_filter], data: &[u8]) -> u32 {
let mut a: u32 = 0;
let mut x: u32 = 0;
let mut mem: [u32; 16] = [0; 16];
let mut pc: usize = 0;
for _ in 0..10_000 {
if pc >= prog.len() {
panic!("BPF: fell off end of program at pc={}", pc);
}
let insn = &prog[pc];
match insn.code {
0x00 => {
a = insn.k;
pc += 1;
}
0x20 => {
let off = insn.k as usize;
a = u32::from_ne_bytes(data[off..off + 4].try_into().unwrap());
pc += 1;
}
0x60 => {
a = mem[insn.k as usize];
pc += 1;
}
0x61 => {
x = mem[insn.k as usize];
pc += 1;
}
0x02 => {
mem[insn.k as usize] = a;
pc += 1;
}
0x54 => {
a &= insn.k;
pc += 1;
}
0x74 => {
a >>= insn.k;
pc += 1;
}
0x7c => {
a = a.checked_shr(x).unwrap_or(0);
pc += 1;
}
0x05 => {
pc += 1 + insn.k as usize;
}
0x15 => {
if a == insn.k {
pc += 1 + insn.jt as usize;
} else {
pc += 1 + insn.jf as usize;
}
}
0x25 => {
if a > insn.k {
pc += 1 + insn.jt as usize;
} else {
pc += 1 + insn.jf as usize;
}
}
0x35 => {
if a >= insn.k {
pc += 1 + insn.jt as usize;
} else {
pc += 1 + insn.jf as usize;
}
}
0x06 => {
return insn.k;
}
0x07 => {
x = a;
pc += 1;
}
other => panic!("BPF: unknown opcode 0x{:04x} at pc={}", other, pc),
}
}
panic!("BPF: execution limit exceeded");
}
fn make_seccomp_data(nr: u32, arch: u32, args: [u64; 6]) -> Vec<u8> {
let mut data = vec![0u8; 64];
data[0..4].copy_from_slice(&nr.to_ne_bytes());
data[4..8].copy_from_slice(&arch.to_ne_bytes());
for (i, arg) in args.iter().enumerate() {
let offset = 16 + i * 8;
data[offset..offset + 8].copy_from_slice(&arg.to_ne_bytes());
}
data
}
const RET_ALLOW: u32 = 0x7fff_0000; const RET_KILL: u32 = 0x8000_0000;
#[test]
fn test_unconditional_allows() {
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
rules.insert(0, Vec::new()); rules.insert(1, Vec::new()); rules.insert(2, Vec::new()); rules.insert(60, Vec::new()); rules.insert(231, Vec::new());
let prog = compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
TargetArch::x86_64,
)
.unwrap();
for nr in [0, 1, 2, 60, 231] {
let data = make_seccomp_data(nr, AUDIT_ARCH_X86_64, [0; 6]);
assert_eq!(
bpf_eval(&prog, &data),
RET_ALLOW,
"syscall {} should be allowed",
nr
);
}
for nr in [3, 4, 59, 100, 300] {
let data = make_seccomp_data(nr, AUDIT_ARCH_X86_64, [0; 6]);
assert_eq!(
bpf_eval(&prog, &data),
RET_KILL,
"syscall {} should be killed",
nr
);
}
}
#[test]
fn test_wrong_arch_is_killed() {
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
rules.insert(0, Vec::new());
let prog = compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
TargetArch::x86_64,
)
.unwrap();
let data = make_seccomp_data(0, 0xDEADBEEF, [0; 6]);
assert_eq!(
bpf_eval(&prog, &data),
libc::SECCOMP_RET_KILL_PROCESS as u32
);
}
#[test]
fn test_arg_filtered_syscalls() {
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
rules.insert(0, Vec::new());
let cond =
SeccompCondition::new(1, SeccompCmpArgLen::Dword, SeccompCmpOp::Eq, 0x5401).unwrap();
rules.insert(16, vec![SeccompRule::new(vec![cond]).unwrap()]);
let prog = compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
TargetArch::x86_64,
)
.unwrap();
let data = make_seccomp_data(0, AUDIT_ARCH_X86_64, [0; 6]);
assert_eq!(bpf_eval(&prog, &data), RET_ALLOW);
let data = make_seccomp_data(16, AUDIT_ARCH_X86_64, [0, 0x5401, 0, 0, 0, 0]);
assert_eq!(bpf_eval(&prog, &data), RET_ALLOW);
let data = make_seccomp_data(16, AUDIT_ARCH_X86_64, [0, 0x1234, 0, 0, 0, 0]);
assert_eq!(bpf_eval(&prog, &data), RET_KILL);
let data = make_seccomp_data(999, AUDIT_ARCH_X86_64, [0; 6]);
assert_eq!(bpf_eval(&prog, &data), RET_KILL);
}
#[test]
fn test_multiple_rules_per_syscall() {
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
let cond_unix =
SeccompCondition::new(0, SeccompCmpArgLen::Dword, SeccompCmpOp::Eq, 1).unwrap();
let cond_inet =
SeccompCondition::new(0, SeccompCmpArgLen::Dword, SeccompCmpOp::Eq, 2).unwrap();
rules.insert(
41,
vec![
SeccompRule::new(vec![cond_unix]).unwrap(),
SeccompRule::new(vec![cond_inet]).unwrap(),
],
);
let prog = compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
TargetArch::x86_64,
)
.unwrap();
let data = make_seccomp_data(41, AUDIT_ARCH_X86_64, [1, 0, 0, 0, 0, 0]);
assert_eq!(bpf_eval(&prog, &data), RET_ALLOW);
let data = make_seccomp_data(41, AUDIT_ARCH_X86_64, [2, 0, 0, 0, 0, 0]);
assert_eq!(bpf_eval(&prog, &data), RET_ALLOW);
let data = make_seccomp_data(41, AUDIT_ARCH_X86_64, [16, 0, 0, 0, 0, 0]);
assert_eq!(bpf_eval(&prog, &data), RET_KILL);
}
#[test]
fn test_equivalence_with_linear_scan() {
use seccompiler::SeccompFilter;
let rules = crate::security::SeccompManager::minimal_filter_for_test(true, &[]);
let rules2 = rules.clone();
let linear_prog: BpfProgram = SeccompFilter::new(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
TargetArch::x86_64,
)
.unwrap()
.try_into()
.unwrap();
let bitmap_prog = compile_bitmap_bpf(
rules2,
SeccompAction::KillProcess,
SeccompAction::Allow,
TargetArch::x86_64,
)
.unwrap();
for nr in 0..500u32 {
let data = make_seccomp_data(nr, AUDIT_ARCH_X86_64, [0; 6]);
let linear_result = bpf_eval(&linear_prog, &data);
let bitmap_result = bpf_eval(&bitmap_prog, &data);
assert_eq!(
linear_result, bitmap_result,
"syscall {} (args=[0;6]): linear=0x{:08x}, bitmap=0x{:08x}",
nr, linear_result, bitmap_result
);
}
let clone_nr = libc::SYS_clone as u32;
let data = make_seccomp_data(clone_nr, AUDIT_ARCH_X86_64, [0, 0, 0, 0, 0, 0]); assert_eq!(bpf_eval(&linear_prog, &data), bpf_eval(&bitmap_prog, &data));
let ioctl_nr = libc::SYS_ioctl as u32;
for req in [0x5401u64, 0x5413, 0x1234] {
let data = make_seccomp_data(ioctl_nr, AUDIT_ARCH_X86_64, [0, req, 0, 0, 0, 0]);
assert_eq!(
bpf_eval(&linear_prog, &data),
bpf_eval(&bitmap_prog, &data),
"ioctl with req=0x{:x}",
req
);
}
let socket_nr = libc::SYS_socket as u32;
for domain in [1u64, 2, 10, 16] {
let data = make_seccomp_data(socket_nr, AUDIT_ARCH_X86_64, [domain, 0, 0, 0, 0, 0]);
assert_eq!(
bpf_eval(&linear_prog, &data),
bpf_eval(&bitmap_prog, &data),
"socket with domain={}",
domain
);
}
}
#[test]
fn test_program_size_is_compact() {
let rules = crate::security::SeccompManager::minimal_filter_for_test(true, &[]);
let prog = compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
TargetArch::x86_64,
)
.unwrap();
assert!(
prog.len() < 400,
"BPF program should be compact, got {} instructions",
prog.len()
);
assert!(
prog.len() < BPF_MAX_LEN,
"BPF program must fit in {} instructions",
BPF_MAX_LEN
);
}
#[test]
fn test_empty_rules() {
let rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
let prog = compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
TargetArch::x86_64,
)
.unwrap();
let data = make_seccomp_data(0, AUDIT_ARCH_X86_64, [0; 6]);
assert_eq!(bpf_eval(&prog, &data), RET_KILL);
}
#[test]
fn test_high_syscall_numbers() {
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
rules.insert(450, Vec::new());
let prog = compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
TargetArch::x86_64,
)
.unwrap();
let data = make_seccomp_data(450, AUDIT_ARCH_X86_64, [0; 6]);
assert_eq!(bpf_eval(&prog, &data), RET_ALLOW);
let data = make_seccomp_data(451, AUDIT_ARCH_X86_64, [0; 6]);
assert_eq!(bpf_eval(&prog, &data), RET_KILL);
}
#[test]
fn test_all_jump_offsets_valid() {
let rules = crate::security::SeccompManager::minimal_filter_for_test(true, &[]);
let prog = compile_bitmap_bpf(
rules,
SeccompAction::KillProcess,
SeccompAction::Allow,
TargetArch::x86_64,
)
.unwrap();
for (pc, insn) in prog.iter().enumerate() {
match insn.code {
0x05 => {
let target = pc + 1 + insn.k as usize;
assert!(
target < prog.len(),
"JA at pc={} jumps to {} (prog len={})",
pc,
target,
prog.len()
);
}
0x15 | 0x25 | 0x35 => {
let target_t = pc + 1 + insn.jt as usize;
let target_f = pc + 1 + insn.jf as usize;
assert!(
target_t < prog.len(),
"JEQ/JGT/JGE jt at pc={} jumps to {} (prog len={})",
pc,
target_t,
prog.len()
);
assert!(
target_f < prog.len(),
"JEQ/JGT/JGE jf at pc={} jumps to {} (prog len={})",
pc,
target_f,
prog.len()
);
}
_ => {}
}
}
}
}