use super::BootError;
use crate::sys::{self, ObservedShape};
use bvisor::linux::protocol::{DescriptorKind, DescriptorSlotV1, LauncherState, LinuxLaunchBodyV1};
use std::fs::File;
use std::io::{Read, Write};
use std::os::fd::RawFd;
pub(super) fn build_seccomp_filter(body: &LinuxLaunchBodyV1) -> Result<sys::BpfProgram, ()> {
use bvisor::linux::seccomp::{DefaultAction, SeccompPolicy};
let Some(request) = body.target.seccomp else {
return Err(());
};
if !request.denies_anything() {
return Err(());
}
let mut deny = Vec::new();
if request.deny_new_tasks {
deny.extend(SeccompPolicy::task_creation_syscalls());
}
if request.deny_inet_sockets {
deny.push(SeccompPolicy::socket_syscall());
}
let policy = SeccompPolicy::denylist(DefaultAction::Errno(eperm()), deny);
let compiled = policy.compile(current_seccomp_arch()).map_err(|_| ())?;
Ok(compiled.program().clone())
}
fn eperm() -> u32 {
u32::try_from(libc::EPERM).unwrap_or(1)
}
fn current_seccomp_arch() -> bvisor::SeccompArch {
use bvisor::SeccompArch;
#[cfg(target_arch = "x86_64")]
{
SeccompArch::X86_64
}
#[cfg(target_arch = "aarch64")]
{
SeccompArch::Aarch64
}
#[cfg(target_arch = "riscv64")]
{
SeccompArch::Riscv64
}
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "riscv64"
)))]
{
SeccompArch::X86_64
}
}
pub(super) fn verify_handles(body: &LinuxLaunchBodyV1) -> Result<Result<(), ()>, BootError> {
for slot in &body.descriptor_table {
let observed = sys::fstat_shape(super::raw(slot.slot_index))?;
if !shape_matches(slot, &observed) {
return Ok(Err(()));
}
}
Ok(Ok(()))
}
fn shape_matches(slot: &DescriptorSlotV1, observed: &ObservedShape) -> bool {
let expected = match slot.expected.kind {
DescriptorKind::Directory => Some((libc::S_IFDIR, true)),
DescriptorKind::Regular => Some((libc::S_IFREG, true)),
DescriptorKind::Socket => Some((libc::S_IFSOCK, false)),
DescriptorKind::Pipe => Some((libc::S_IFIFO, false)),
_ => None,
};
let Some((file_type, writability_meaningful)) = expected else {
return false;
};
if observed.file_type != file_type {
return false;
}
if writability_meaningful {
observed.writable == slot.expected.writable
} else {
true
}
}
pub(super) fn user_namespace_rendezvous(
child_pid: libc::pid_t,
sync_write_fd: RawFd,
) -> Result<(), ()> {
let (euid, egid) = sys::effective_ids();
let uid_map = format!("0 {euid} 1\n");
let gid_map = format!("0 {egid} 1\n");
let base = format!("/proc/{child_pid}");
let uid_map_leaf = forced_map_fail_leaf().unwrap_or("uid_map");
let write_step = |leaf: &str, contents: &str| -> Result<(), ()> {
std::fs::write(format!("{base}/{leaf}"), contents).map_err(|_| ())
};
let mapped = write_step(uid_map_leaf, &uid_map)
.and_then(|()| write_step("setgroups", "deny"))
.and_then(|()| write_step("gid_map", &gid_map));
if mapped.is_err() {
sys::close_fd(sync_write_fd);
return Err(());
}
let mut writer = sys::adopt_fd(sync_write_fd);
if writer.write_all(&[1u8]).is_err() {
return Err(());
}
Ok(())
}
#[cfg(feature = "dangerous-test-hooks")]
fn forced_map_fail_leaf() -> Option<&'static str> {
match std::env::var("BVISOR_TEST_FORCE_USERNS_MAP_FAIL") {
Ok(v) if v.trim() == "1" => Some("bvisor_nonexistent_map_attr"),
_ => None,
}
}
#[cfg(not(feature = "dangerous-test-hooks"))]
fn forced_map_fail_leaf() -> Option<&'static str> {
None
}
pub(super) enum ChildOutcome {
ExecedToEof,
Errno(i32),
}
pub(super) fn wait_for_child(
error_read_fd: RawFd,
child_pid: libc::pid_t,
) -> Result<ChildOutcome, BootError> {
let mut pipe = sys::adopt_fd(error_read_fd);
let mut buf = Vec::new();
pipe.read_to_end(&mut buf)?;
reap(child_pid);
if buf.len() >= 4 {
let errno = i32::from_ne_bytes([buf[0], buf[1], buf[2], buf[3]]);
Ok(ChildOutcome::Errno(errno))
} else if buf.is_empty() {
Ok(ChildOutcome::ExecedToEof)
} else {
Ok(ChildOutcome::Errno(-1))
}
}
fn reap(child_pid: libc::pid_t) {
sys::reap_child(child_pid);
}
pub(super) struct Transcript {
sink: File,
}
impl Transcript {
pub(super) fn new(sink: File) -> Self {
Self { sink }
}
pub(super) fn emit(&mut self, state: LauncherState) {
let _ = writeln!(self.sink, "{state:?}");
let _ = self.sink.flush();
}
pub(super) fn note(&mut self, text: &str) -> std::io::Result<()> {
writeln!(self.sink, "# {text}")?;
self.sink.flush()
}
}
pub(super) fn boot_fault(err: &BootError) -> std::process::ExitCode {
let mut sink = std::io::stderr();
let _ = writeln!(sink, "bvisor-linux-launcher: {err}");
std::process::ExitCode::from(4)
}