use std::io;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct SyscallNotif {
pub id: u64,
pub pid: u32,
pub nr: i32,
pub arch: u32,
pub instruction_pointer: u64,
pub args: [u64; 6],
}
#[derive(Debug, Clone)]
pub enum SyscallResponse {
Allow,
Deny(i32),
Return(i64),
}
pub trait SyscallHandler: Send + Sync + 'static {
fn handle(&self, notif: &SyscallNotif) -> SyscallResponse;
}
const SECCOMP_SET_MODE_FILTER: libc::c_int = 1;
const SECCOMP_FILTER_FLAG_NEW_LISTENER: libc::c_ulong = 1 << 3;
const SECCOMP_RET_USER_NOTIF: u32 = 0x7fc00000;
const SECCOMP_RET_ALLOW: u32 = 0x7fff0000;
const SECCOMP_USER_NOTIF_FLAG_CONTINUE: u32 = 1;
#[repr(C)]
struct SeccompData {
nr: i32,
arch: u32,
instruction_pointer: u64,
args: [u64; 6],
}
#[repr(C)]
struct SeccompNotifRaw {
id: u64,
pid: u32,
flags: u32,
data: SeccompData,
}
#[repr(C)]
struct SeccompNotifResp {
id: u64,
val: i64,
error: i32,
flags: u32,
}
const fn iowr(type_: u32, nr: u32, size: u32) -> libc::c_ulong {
((3u32 << 30) | (type_ << 8) | nr | (size << 16)) as libc::c_ulong
}
const SECCOMP_IOCTL_NOTIF_RECV: libc::c_ulong = iowr(
b'!' as u32,
0,
std::mem::size_of::<SeccompNotifRaw>() as u32,
);
const SECCOMP_IOCTL_NOTIF_SEND: libc::c_ulong = iowr(
b'!' as u32,
1,
std::mem::size_of::<SeccompNotifResp>() as u32,
);
pub fn build_user_notif_bpf(syscalls: &[i64]) -> Vec<libc::sock_filter> {
const BPF_LD_W_ABS: u16 = 0x20; const BPF_JEQ_K: u16 = 0x15; const BPF_RET_K: u16 = 0x06;
let n = syscalls.len();
let mut insns: Vec<libc::sock_filter> = Vec::with_capacity(n + 3);
insns.push(libc::sock_filter {
code: BPF_LD_W_ABS,
jt: 0,
jf: 0,
k: 0, });
for (i, &syscall_nr) in syscalls.iter().enumerate() {
let jt = (n - i) as u8; insns.push(libc::sock_filter {
code: BPF_JEQ_K,
jt,
jf: 0, k: syscall_nr as u32,
});
}
insns.push(libc::sock_filter {
code: BPF_RET_K,
jt: 0,
jf: 0,
k: SECCOMP_RET_ALLOW,
});
insns.push(libc::sock_filter {
code: BPF_RET_K,
jt: 0,
jf: 0,
k: SECCOMP_RET_USER_NOTIF,
});
insns
}
pub fn install_user_notif_filter(bpf: &[libc::sock_filter]) -> io::Result<i32> {
if bpf.is_empty() {
return Err(io::Error::other("user_notif BPF filter is empty"));
}
let fprog = libc::sock_fprog {
len: bpf.len() as u16,
filter: bpf.as_ptr() as *mut libc::sock_filter,
};
let ret = unsafe {
libc::syscall(
libc::SYS_seccomp,
SECCOMP_SET_MODE_FILTER as libc::c_long,
SECCOMP_FILTER_FLAG_NEW_LISTENER as libc::c_long,
&fprog as *const libc::sock_fprog as libc::c_long,
)
};
if ret < 0 {
Err(io::Error::last_os_error())
} else {
Ok(ret as i32)
}
}
pub fn send_notif_fd(sock: i32, notif_fd: i32) -> io::Result<()> {
let cmsg_space =
unsafe { libc::CMSG_SPACE(std::mem::size_of::<i32>() as libc::c_uint) as usize };
let mut cmsg_buf = vec![0u8; cmsg_space];
let mut iov_buf = [0u8; 1];
let mut iov = libc::iovec {
iov_base: iov_buf.as_mut_ptr() as *mut libc::c_void,
iov_len: 1,
};
let mut msg: libc::msghdr = unsafe { std::mem::zeroed() };
msg.msg_iov = &mut iov;
msg.msg_iovlen = 1;
msg.msg_control = cmsg_buf.as_mut_ptr() as *mut libc::c_void;
msg.msg_controllen = cmsg_space as _;
let cmsg = unsafe { libc::CMSG_FIRSTHDR(&msg) };
if cmsg.is_null() {
return Err(io::Error::other("CMSG_FIRSTHDR returned null"));
}
unsafe {
(*cmsg).cmsg_level = libc::SOL_SOCKET;
(*cmsg).cmsg_type = libc::SCM_RIGHTS;
(*cmsg).cmsg_len = libc::CMSG_LEN(std::mem::size_of::<i32>() as _) as _;
*(libc::CMSG_DATA(cmsg) as *mut i32) = notif_fd;
}
let ret = unsafe { libc::sendmsg(sock, &msg, 0) };
if ret < 0 {
Err(io::Error::last_os_error())
} else {
Ok(())
}
}
pub fn recv_notif_fd(sock: i32) -> io::Result<i32> {
let cmsg_space =
unsafe { libc::CMSG_SPACE(std::mem::size_of::<i32>() as libc::c_uint) as usize };
let mut cmsg_buf = vec![0u8; cmsg_space];
let mut iov_buf = [0u8; 1];
let mut iov = libc::iovec {
iov_base: iov_buf.as_mut_ptr() as *mut libc::c_void,
iov_len: 1,
};
let mut msg: libc::msghdr = unsafe { std::mem::zeroed() };
msg.msg_iov = &mut iov;
msg.msg_iovlen = 1;
msg.msg_control = cmsg_buf.as_mut_ptr() as *mut libc::c_void;
msg.msg_controllen = cmsg_space as _;
let ret = unsafe { libc::recvmsg(sock, &mut msg, 0) };
if ret < 0 {
return Err(io::Error::last_os_error());
}
let cmsg = unsafe { libc::CMSG_FIRSTHDR(&msg) };
if cmsg.is_null() {
return Err(io::Error::other("recvmsg: no control message received"));
}
Ok(unsafe { *(libc::CMSG_DATA(cmsg) as *const i32) })
}
pub fn run_supervisor_loop(notif_fd: i32, handler: Arc<dyn SyscallHandler>) {
loop {
let mut notif: SeccompNotifRaw = unsafe { std::mem::zeroed() };
let ret = unsafe {
libc::ioctl(
notif_fd,
SECCOMP_IOCTL_NOTIF_RECV,
&mut notif as *mut SeccompNotifRaw,
)
};
if ret < 0 {
let err = io::Error::last_os_error();
match err.raw_os_error() {
Some(libc::ENOENT) => break,
Some(libc::EINTR) => continue,
Some(libc::EBADF) => break,
_ => {
log::warn!("seccomp notif recv error: {}", err);
break;
}
}
}
let public_notif = SyscallNotif {
id: notif.id,
pid: notif.pid,
nr: notif.data.nr,
arch: notif.data.arch,
instruction_pointer: notif.data.instruction_pointer,
args: notif.data.args,
};
let response = handler.handle(&public_notif);
let mut resp: SeccompNotifResp = unsafe { std::mem::zeroed() };
resp.id = notif.id;
match response {
SyscallResponse::Allow => {
resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
resp.error = 0;
resp.val = 0;
}
SyscallResponse::Deny(errno) => {
resp.flags = 0;
resp.error = -errno.abs(); resp.val = 0;
}
SyscallResponse::Return(val) => {
resp.flags = 0;
resp.error = 0;
resp.val = val;
}
}
let ret = unsafe {
libc::ioctl(
notif_fd,
SECCOMP_IOCTL_NOTIF_SEND,
&resp as *const SeccompNotifResp,
)
};
if ret < 0 {
let err = io::Error::last_os_error();
match err.raw_os_error() {
Some(libc::ENOENT) => continue,
Some(libc::EINTR) => continue,
Some(libc::EBADF) => break,
_ => {
log::warn!("seccomp notif send error: {}", err);
break;
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bpf_filter_empty_syscalls() {
let bpf = build_user_notif_bpf(&[]);
assert_eq!(bpf.len(), 3);
}
#[test]
fn test_bpf_filter_single_syscall() {
let bpf = build_user_notif_bpf(&[42]); assert_eq!(bpf.len(), 4);
assert_eq!(bpf[0].code, 0x20);
assert_eq!(bpf[0].k, 0);
assert_eq!(bpf[1].k, 42);
assert_eq!(bpf[1].jt, 1); assert_eq!(bpf[1].jf, 0);
assert_eq!(bpf[2].k, SECCOMP_RET_ALLOW);
assert_eq!(bpf[3].k, SECCOMP_RET_USER_NOTIF);
}
#[test]
fn test_bpf_filter_multiple_syscalls() {
let syscalls = [42i64, 165, 228]; let bpf = build_user_notif_bpf(&syscalls);
assert_eq!(bpf.len(), 6);
assert_eq!(bpf[1].jt, 3);
assert_eq!(bpf[2].jt, 2);
assert_eq!(bpf[3].jt, 1);
assert_eq!(bpf[4].k, SECCOMP_RET_ALLOW);
assert_eq!(bpf[5].k, SECCOMP_RET_USER_NOTIF);
}
#[test]
fn test_ioctl_numbers_reasonable() {
assert_eq!(SECCOMP_IOCTL_NOTIF_RECV >> 30, 3);
assert_eq!((SECCOMP_IOCTL_NOTIF_RECV >> 8) & 0xff, 0x21);
assert_eq!(SECCOMP_IOCTL_NOTIF_RECV & 0xff, 0);
assert_eq!(SECCOMP_IOCTL_NOTIF_SEND >> 30, 3);
assert_eq!((SECCOMP_IOCTL_NOTIF_SEND >> 8) & 0xff, 0x21);
assert_eq!(SECCOMP_IOCTL_NOTIF_SEND & 0xff, 1);
}
}