use boxlite_shared::errors::BoxliteError;
use std::collections::HashMap;
use std::io::Read;
use std::sync::Arc;
use bincode::config;
use bincode::config::{Configuration, Fixint, Limit, LittleEndian};
const DESERIALIZATION_BYTES_LIMIT: usize = 100_000;
const BINCODE_CONFIG: Configuration<LittleEndian, Fixint, Limit<DESERIALIZATION_BYTES_LIMIT>> =
config::standard()
.with_fixed_int_encoding()
.with_limit::<DESERIALIZATION_BYTES_LIMIT>()
.with_little_endian();
pub type BpfInstruction = u64;
pub type BpfProgram = Vec<BpfInstruction>;
pub type BpfProgramRef<'a> = &'a [BpfInstruction];
pub type BpfThreadMap = HashMap<String, Arc<BpfProgram>>;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SeccompRole {
Vmm,
Vcpu,
Api,
}
impl SeccompRole {
pub fn as_str(&self) -> &'static str {
match self {
Self::Vmm => "vmm",
Self::Vcpu => "vcpu",
Self::Api => "api",
}
}
}
pub fn get_filter(filters: &BpfThreadMap, role: SeccompRole) -> Option<&Arc<BpfProgram>> {
filters.get(role.as_str())
}
pub type DeserializationError = bincode::error::DecodeError;
pub fn get_empty_filters() -> BpfThreadMap {
let mut map = BpfThreadMap::new();
map.insert("vmm".to_string(), Arc::new(vec![]));
map.insert("api".to_string(), Arc::new(vec![]));
map.insert("vcpu".to_string(), Arc::new(vec![]));
map
}
pub fn deserialize_binary<R: Read>(mut reader: R) -> Result<BpfThreadMap, DeserializationError> {
let result: HashMap<String, _> = bincode::decode_from_std_read(&mut reader, BINCODE_CONFIG)?;
Ok(result
.into_iter()
.map(|(k, v)| (k.to_lowercase(), Arc::new(v)))
.collect())
}
#[derive(Debug, thiserror::Error, displaydoc::Display)]
pub enum InstallationError {
FilterTooLarge,
TsyncFailed(i64),
Prctl(std::io::Error),
}
pub const BPF_MAX_LEN: usize = 4096;
#[repr(C)]
#[derive(Debug)]
struct SockFprog {
len: u16,
filter: *const BpfInstruction,
}
pub fn apply_filter(bpf_filter: BpfProgramRef) -> Result<(), InstallationError> {
install_filter(bpf_filter, 0)
}
pub fn apply_filter_all_threads(bpf_filter: BpfProgramRef) -> Result<(), InstallationError> {
install_filter(bpf_filter, libc::SECCOMP_FILTER_FLAG_TSYNC)
}
fn install_filter(
bpf_filter: BpfProgramRef,
flags: libc::c_ulong,
) -> Result<(), InstallationError> {
if bpf_filter.is_empty() {
return Ok(());
}
if BPF_MAX_LEN < bpf_filter.len() {
return Err(InstallationError::FilterTooLarge);
}
let bpf_filter_len =
u16::try_from(bpf_filter.len()).map_err(|_| InstallationError::FilterTooLarge)?;
unsafe {
{
let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
if rc != 0 {
return Err(InstallationError::Prctl(std::io::Error::last_os_error()));
}
}
let bpf_prog = SockFprog {
len: bpf_filter_len,
filter: bpf_filter.as_ptr(),
};
let bpf_prog_ptr = &bpf_prog as *const SockFprog;
{
let rc = libc::syscall(
libc::SYS_seccomp,
libc::SECCOMP_SET_MODE_FILTER,
flags,
bpf_prog_ptr,
);
if rc > 0 {
return Err(InstallationError::TsyncFailed(rc));
}
if rc != 0 {
return Err(InstallationError::Prctl(std::io::Error::last_os_error()));
}
}
}
Ok(())
}
#[cfg(target_os = "linux")]
pub fn apply_vmm_filter(box_id: &str) -> crate::BoxliteResult<()> {
use crate::jailer::error::{IsolationError, JailerError};
let filters = load_filters(box_id)?;
let vmm_filter = get_filter(&filters, SeccompRole::Vmm).ok_or_else(|| {
tracing::error!(box_id = %box_id, "VMM filter not found in compiled filters");
BoxliteError::from(JailerError::Isolation(IsolationError::Seccomp(
"Missing vmm filter".to_string(),
)))
})?;
tracing::debug!(
box_id = %box_id,
bpf_instructions = vmm_filter.len(),
"Applying VMM seccomp filter to all threads (TSYNC)"
);
apply_filter_all_threads(vmm_filter).map_err(|e| {
tracing::error!(
box_id = %box_id,
error = %e,
"Failed to apply VMM seccomp filter (TSYNC)"
);
BoxliteError::from(JailerError::Isolation(IsolationError::Seccomp(
e.to_string(),
)))
})?;
tracing::info!(
box_id = %box_id,
vmm_filter_instructions = vmm_filter.len(),
"VMM seccomp filter applied to all threads (TSYNC)"
);
if let Some(vcpu_filter) = get_filter(&filters, SeccompRole::Vcpu) {
tracing::debug!(
box_id = %box_id,
vcpu_filter_instructions = vcpu_filter.len(),
"vCPU filter available (vCPU threads inherit from main thread)"
);
}
Ok(())
}
#[cfg(target_os = "linux")]
fn load_filters(box_id: &str) -> crate::BoxliteResult<BpfThreadMap> {
use crate::jailer::error::{IsolationError, JailerError};
let filter_bytes = include_bytes!(concat!(env!("OUT_DIR"), "/seccomp_filter.bpf"));
deserialize_binary(&filter_bytes[..]).map_err(|e| {
tracing::error!(
box_id = %box_id,
error = %e,
"Failed to deserialize seccomp filters"
);
BoxliteError::from(JailerError::Isolation(IsolationError::Seccomp(
e.to_string(),
)))
})
}
#[cfg(test)]
mod tests {
#![allow(clippy::undocumented_unsafe_blocks)]
use std::collections::HashMap;
use std::sync::Arc;
use std::thread;
use super::*;
#[test]
fn test_deserialize_binary() {
let data = "invalid data".to_string();
deserialize_binary(data.as_bytes()).unwrap_err();
let bpf_prog = vec![0; 2];
let mut filter_map: HashMap<String, BpfProgram> = HashMap::new();
filter_map.insert("VcpU".to_string(), bpf_prog.clone());
let bytes = bincode::encode_to_vec(&filter_map, BINCODE_CONFIG).unwrap();
let mut expected_res = BpfThreadMap::new();
expected_res.insert("vcpu".to_string(), Arc::new(bpf_prog));
assert_eq!(deserialize_binary(&bytes[..]).unwrap(), expected_res);
let bpf_prog = vec![0; DESERIALIZATION_BYTES_LIMIT + 1];
let mut filter_map: HashMap<String, BpfProgram> = HashMap::new();
filter_map.insert("VcpU".to_string(), bpf_prog.clone());
let bytes = bincode::encode_to_vec(&filter_map, BINCODE_CONFIG).unwrap();
assert!(matches!(
deserialize_binary(&bytes[..]).unwrap_err(),
bincode::error::DecodeError::LimitExceeded
));
}
#[test]
fn test_filter_apply() {
thread::spawn(|| {
let filter: BpfProgram = vec![0; 5000];
assert!(matches!(
apply_filter(&filter).unwrap_err(),
InstallationError::FilterTooLarge
));
})
.join()
.unwrap();
thread::spawn(|| {
let filter: BpfProgram = vec![];
assert_eq!(filter.len(), 0);
let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) };
assert_eq!(seccomp_level, 0);
apply_filter(&filter).unwrap();
let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) };
assert_eq!(seccomp_level, 0);
})
.join()
.unwrap();
thread::spawn(|| {
let filter = vec![0xFF; 1];
let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) };
assert_eq!(seccomp_level, 0);
assert!(matches!(
apply_filter(&filter).unwrap_err(),
InstallationError::Prctl(_)
));
let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) };
assert_eq!(seccomp_level, 0);
})
.join()
.unwrap();
}
#[test]
fn test_get_empty_filters() {
let filters = get_empty_filters();
assert_eq!(filters.len(), 3);
assert!(filters.get("vmm").unwrap().is_empty());
assert!(filters.get("vcpu").unwrap().is_empty());
assert!(filters.get("api").unwrap().is_empty());
}
#[test]
fn test_seccomp_role() {
assert_eq!(SeccompRole::Vmm.as_str(), "vmm");
assert_eq!(SeccompRole::Vcpu.as_str(), "vcpu");
assert_eq!(SeccompRole::Api.as_str(), "api");
}
#[test]
fn test_get_filter() {
let mut map = BpfThreadMap::new();
map.insert("vmm".to_string(), Arc::new(vec![1, 2, 3]));
map.insert("vcpu".to_string(), Arc::new(vec![4, 5]));
assert!(get_filter(&map, SeccompRole::Vmm).is_some());
assert_eq!(get_filter(&map, SeccompRole::Vmm).unwrap().len(), 3);
assert!(get_filter(&map, SeccompRole::Vcpu).is_some());
assert!(get_filter(&map, SeccompRole::Api).is_none());
}
#[test]
fn test_tsync_failed_display() {
let err = InstallationError::TsyncFailed(12345);
assert!(err.to_string().contains("12345"));
}
#[test]
fn test_apply_filter_all_threads_empty() {
thread::spawn(|| {
let filter: BpfProgram = vec![];
apply_filter_all_threads(&filter).unwrap();
let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) };
assert_eq!(seccomp_level, 0);
})
.join()
.unwrap();
}
#[cfg(target_os = "linux")]
#[test]
fn test_deserialize_embedded_filter() {
let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/seccomp_filter.bpf"));
let filters = deserialize_binary(&bytes[..]).expect("Failed to deserialize filter");
assert!(filters.contains_key("vmm"), "Missing vmm filter");
assert!(filters.contains_key("vcpu"), "Missing vcpu filter");
assert!(filters.contains_key("api"), "Missing api filter");
assert!(
!filters.get("vmm").unwrap().is_empty(),
"vmm filter is empty"
);
assert!(
!filters.get("vcpu").unwrap().is_empty(),
"vcpu filter is empty"
);
}
}