#![cfg(all(
feature = "backend-linux",
feature = "dangerous-test-hooks",
target_os = "linux"
))]
use bvisor::linux::launch::{self, AuthorityFd};
use bvisor::linux::protocol::{
DescriptorKind, DescriptorRole, DescriptorShape, DescriptorSlotV1, LinuxLaunchBodyV1,
LinuxLaunchPlanV1, LoweringWireEntryV1, LoweringWireV1, SeccompRequest, TargetSpecV1,
};
use bvisor::linux::seccomp::seccomp_filter_available;
use bvisor::{
AdmissionProgramHash, AttemptId, Backend, BackendId, BackendProfileHash, BackendRegistry,
BoundaryPlanHash, BoundaryPlanner, BoundaryReportBody, BoundarySpec, BudgetRequirements,
Capability, EnvPolicy, EvidenceRequirements, HostControl, KillGuarantee, KillTarget,
LinuxBackend, MinGuarantee, Outcome, SpawnPolicy, StdStreams, Workload,
};
use std::io::Write;
use std::os::fd::{OwnedFd, RawFd};
use std::path::PathBuf;
use std::sync::Arc;
use std::time::{Duration, Instant};
const ID_AMBIENT_SCRUB: &str = "linux.ambient.scrub.v1";
const ID_SECCOMP_APPLY: &str = "linux.seccomp.apply.v1";
const ID_EXEC: &str = "linux.exec.v1";
const PHASE_CODE_SCRUB: u8 = 3;
const PHASE_CODE_CONFINE: u8 = 4;
const PHASE_CODE_EXEC: u8 = 5;
const EXE_SLOT: u32 = 10;
fn test_launcher_path() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_bvisor-linux-launcher"))
}
fn unique_marker() -> String {
let pid = std::process::id();
let nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
format!("BVISOR-SPAWN-MARKER-{pid}-{nanos}")
}
fn host_find_child(marker: &str, deadline: Instant) -> Option<RawFd> {
while Instant::now() < deadline {
if let Some(pid) = scan_proc_cmdline(marker) {
return Some(pid);
}
std::thread::sleep(Duration::from_millis(10));
}
None
}
fn scan_proc_cmdline(marker: &str) -> Option<RawFd> {
let entries = std::fs::read_dir("/proc").ok()?;
for entry in entries.flatten() {
let name = entry.file_name();
let Some(pid_str) = name.to_str() else {
continue;
};
if !pid_str.bytes().all(|b| b.is_ascii_digit()) {
continue;
}
let Ok(pid) = pid_str.parse::<RawFd>() else {
continue;
};
let Ok(bytes) = std::fs::read(format!("/proc/{pid_str}/cmdline")) else {
continue;
};
if String::from_utf8_lossy(&bytes).contains(marker) {
return Some(pid);
}
}
None
}
fn host_read_child_seccomp_mode(pid: RawFd) -> Option<u32> {
let text = std::fs::read_to_string(format!("/proc/{pid}/status")).ok()?;
for line in text.lines() {
if let Some(rest) = line.strip_prefix("Seccomp:") {
return rest.trim().parse::<u32>().ok();
}
}
None
}
fn entry(id: &str, phase_code: u8) -> LoweringWireEntryV1 {
LoweringWireEntryV1 {
id: id.to_owned(),
version: 1,
phase_code,
param_digest: [0u8; 32],
decl_digest: [0u8; 32],
}
}
fn plan(argv: Vec<String>, seccomp: Option<SeccompRequest>) -> LinuxLaunchPlanV1 {
let mut entries = vec![entry(ID_AMBIENT_SCRUB, PHASE_CODE_SCRUB)];
if seccomp.is_some() {
entries.push(entry(ID_SECCOMP_APPLY, PHASE_CODE_CONFINE));
}
entries.push(entry(ID_EXEC, PHASE_CODE_EXEC));
let lowering = LoweringWireV1 { entries };
let bytes = batpak::canonical::to_bytes(&lowering).expect("encode lowering");
let h_l = batpak::event::hash::compute_hash(&bytes);
let table = vec![DescriptorSlotV1 {
slot_index: EXE_SLOT,
role: DescriptorRole::TargetExe,
expected: DescriptorShape {
kind: DescriptorKind::Regular,
writable: false,
},
}];
LinuxLaunchPlanV1 {
body: LinuxLaunchBodyV1 {
attempt_id: AttemptId([9u8; 32]),
plan_id: BoundaryPlanHash([1u8; 32]),
h_a: AdmissionProgramHash([2u8; 32]),
h_p: BackendProfileHash([3u8; 32]),
h_l,
lowering,
descriptor_table: table,
target: TargetSpecV1 {
argv,
envp: vec![("PATH".to_owned(), "/usr/bin:/bin".to_owned())],
exe_slot: EXE_SLOT,
user_namespace: None,
network_namespace: None,
seccomp,
},
},
}
}
fn sh_authority() -> AuthorityFd {
AuthorityFd {
slot_index: RawFd::try_from(EXE_SLOT).expect("exe slot fits RawFd"),
handle: OwnedFd::from(std::fs::File::open("/bin/sh").expect("open /bin/sh")),
}
}
#[test]
fn deny_new_tasks_fork_is_refused_and_host_sees_seccomp_filter_or_skip() {
let mut sink = std::io::stderr();
if !seccomp_filter_available() {
let _ = writeln!(
sink,
"SKIP ChildSpawnDenyNewTasks oracle: this host lacks seccomp filter support \
(no /proc/sys/kernel/seccomp/actions_avail) — the cell is FAIL_CLOSED here, never \
a silent pass"
);
return;
}
let marker = unique_marker();
let script = format!(
": {marker}; echo before_fork=ok; \
i=0; while [ \"$i\" -lt 1200000 ]; do i=$((i+1)); done; \
/bin/echo after_fork=ok 2>/dev/null; true"
);
let argv = vec!["sh".to_string(), "-c".to_string(), script];
let launcher = test_launcher_path();
let p = plan(argv, Some(SeccompRequest::deny_new_tasks()));
let deadline = Instant::now() + Duration::from_millis(5000);
let handle = std::thread::Builder::new()
.name("spawn-oracle-launcher".to_string())
.spawn(move || {
launch::run_launcher(&launcher, &p, vec![sh_authority()])
.expect("the launcher runs the deny-new-tasks workload to a verdict")
})
.expect("spawn the launcher driver thread");
let mut host_mode: Option<u32> = None;
if let Some(pid) = host_find_child(&marker, deadline) {
while Instant::now() < deadline {
if let Some(mode) = host_read_child_seccomp_mode(pid) {
host_mode = Some(mode);
break;
}
std::thread::sleep(Duration::from_millis(10));
}
}
let obs = handle.join().expect("spawn-oracle launcher thread joins");
if launch::launch_confinement_unavailable(&obs) {
let _ = writeln!(
sink,
"SKIP deny_new_tasks_fork_is_refused_and_host_sees_seccomp_filter_or_skip: \
kernel/container lacks landlock/userns/seccomp (ENOSYS); the launcher faulted before \
exec — exercised on capable kernels + the bvisor-linux CI lane"
);
return;
}
let stdout = String::from_utf8_lossy(&obs.captured_stdout).into_owned();
let _ = writeln!(
sink,
"ChildSpawnDenyNewTasks: host seccomp_mode={host_mode:?}; workload stdout={stdout:?}; \
notes={:?}",
obs.notes
);
let mut failures: Vec<String> = Vec::new();
if !obs.exec_succeeded() {
failures.push(format!(
"the workload must run to ExecSucceeded with the seccomp filter installed (the filter \
allows execve/execveat); terminal={:?} notes={:?}",
obs.terminal, obs.notes
));
}
if !obs.notes.iter().any(|n| n.contains("seccomp")) {
failures.push(format!(
"the launcher must attest the seccomp denylist install; notes={:?}",
obs.notes
));
}
match host_mode {
Some(2) => {}
other => failures.push(format!(
"CHANNEL A: the child's /proc/<pid>/status must report Seccomp: 2 (filter mode \
installed), got {other:?}"
)),
}
if !stdout.contains("before_fork=ok") {
failures.push(format!(
"CHANNEL B: the workload must START under the filter (the filter allows execve so the \
shell runs + write so it reports), got stdout={stdout:?}"
));
}
if stdout.contains("after_fork=ok") {
failures.push(format!(
"CHANNEL B: the workload's EXTERNAL-command fork must be REFUSED by the seccomp \
denylist (after_fork=ok must NOT appear), got stdout={stdout:?}"
));
}
assert!(
failures.is_empty(),
"ChildSpawnDenyNewTasks host-side oracle failures: {failures:#?}"
);
}
fn spawn_spec(policy: SpawnPolicy, args: Vec<String>) -> BoundarySpec {
BoundarySpec {
workload: Workload::Process {
exe: "/bin/sh".to_string(),
args,
},
capabilities: vec![
Capability::ChildSpawn { policy },
Capability::Environment {
policy: EnvPolicy::Exact(Vec::new()),
},
],
controls: vec![
HostControl::LaunchWorkload,
HostControl::CaptureStreams {
streams: StdStreams::capture_out_err(),
},
],
budgets: BudgetRequirements::uniform(8, MinGuarantee::Mediated),
evidence: EvidenceRequirements::default(),
}
}
fn descendants_spec(args: Vec<String>) -> BoundarySpec {
let mut spec = spawn_spec(SpawnPolicy::AllowDescendantsWithinBoundary, args);
spec.controls.push(HostControl::Kill {
target: KillTarget::RunTree,
guarantee: KillGuarantee::Atomic,
});
spec
}
fn run_execute(spec: &BoundarySpec) -> Option<BoundaryReportBody> {
let backend = Arc::new(LinuxBackend::with_launcher_path(test_launcher_path()));
let id: BackendId = backend.id();
let mut registry = BackendRegistry::new();
registry.register(Arc::clone(&backend) as Arc<dyn Backend>);
let plan = BoundaryPlanner::new(®istry).plan(spec, &id).ok()?;
Some(
bvisor::BoundaryRunner::new(®istry)
.run(&plan)
.expect("the run seals a terminal report")
.body,
)
}
#[test]
fn a_deny_new_tasks_spec_runs_through_the_execute_path_or_skip() {
let mut sink = std::io::stderr();
if !seccomp_filter_available() {
let refused = run_execute(&spawn_spec(
SpawnPolicy::DenyNewTasks,
vec!["-c".to_string(), "true".to_string()],
));
assert!(
refused.is_none(),
"FAIL_CLOSED: with no seccomp filter support, a ChildSpawnDenyNewTasks spec must \
REFUSE at admission (the cell is Unsupported) — the target never runs; got {refused:?}"
);
let _ = writeln!(
sink,
"SKIP ChildSpawnDenyNewTasks execute-path positive: no seccomp filter support; the \
fail-closed admission refusal was asserted instead (never a silent pass)"
);
return;
}
let report = run_execute(&spawn_spec(
SpawnPolicy::DenyNewTasks,
vec![
"-c".to_string(),
"if (exit 0) 2>/dev/null & then echo fork=OK; else echo fork=REFUSED; fi; true"
.to_string(),
],
))
.expect("a ChildSpawnDenyNewTasks spec must ADMIT (the cell is Enforced on this host)");
if launch::report_confinement_unavailable(&report.observed) {
let _ = writeln!(
sink,
"SKIP a_deny_new_tasks_spec_runs_through_the_execute_path_or_skip: kernel/container \
lacks landlock/userns/seccomp (ENOSYS); confinement cannot install here — exercised \
on capable kernels + the bvisor-linux CI lane"
);
return;
}
let mut failures: Vec<String> = Vec::new();
if report.outcome != Outcome::Completed {
failures.push(format!(
"the DenyNewTasks workload must run to Completed under the seccomp denylist: {:?} / {:?}",
report.outcome, report.observed
));
}
if !report
.observed
.iter()
.any(|f| f.kind == "child_spawn_lowered")
{
failures.push(format!(
"the execute() path must record the child-spawn lowering: {:?}",
report.observed
));
}
assert!(
failures.is_empty(),
"ChildSpawnDenyNewTasks execute()-path witness failures: {failures:#?}"
);
}
#[test]
fn allow_threads_fails_closed_at_admission_the_target_never_runs() {
let report = run_execute(&spawn_spec(
SpawnPolicy::AllowThreadsWithinBoundary,
vec!["-c".to_string(), "true".to_string()],
));
assert!(
report.is_none(),
"a ChildSpawnAllowThreads spec must FAIL CLOSED at admission (the cell is Unsupported — \
the open clone3-pointer/classic-BPF problem) — the target never runs; got a sealed \
report {report:?}"
);
}
#[test]
fn allow_descendants_is_cgroup_confined_and_cgroup_kill_drains_the_tree_or_skip() {
let mut sink = std::io::stderr();
let report = run_execute(&descendants_spec(vec![
"-c".to_string(),
"(sleep 2 &) ; echo spawned=descendant; true".to_string(),
]));
let Some(report) = report else {
let _ = writeln!(
sink,
"SKIP ChildSpawnAllowDescendants: no cgroup base on this host — the cell is \
FAIL_CLOSED (admission refused), never a silent pass"
);
return;
};
if launch::report_confinement_unavailable(&report.observed) {
let _ = writeln!(
sink,
"SKIP allow_descendants_is_cgroup_confined_and_cgroup_kill_drains_the_tree_or_skip: \
kernel/container lacks landlock/userns/seccomp (ENOSYS); confinement cannot install \
here — exercised on capable kernels + the bvisor-linux CI lane"
);
return;
}
let mut failures: Vec<String> = Vec::new();
if !report
.observed
.iter()
.any(|f| f.kind == "child_spawn_lowered")
{
failures.push(format!(
"the execute() path must record the AllowDescendants cgroup lowering: {:?}",
report.observed
));
}
if report.outcome == Outcome::SupervisorFault {
assert!(
failures.is_empty(),
"ChildSpawnAllowDescendants lowering failures (pre-skip): {failures:#?}"
);
let _ = writeln!(
sink,
"SKIP ChildSpawnAllowDescendants runtime witness: the launcher could not place the \
child into the cgroup leaf via CLONE_INTO_CGROUP on this host (cgroup-delegation \
limitation) — the lowering engaged but the placement is unexercisable here, never a \
silent pass"
);
return;
}
if report.outcome != Outcome::Completed {
failures.push(format!(
"the AllowDescendants workload must run to Completed in its cgroup: {:?} / {:?}",
report.outcome, report.observed
));
}
if report
.observed
.iter()
.any(|f| f.kind == "cgroup_teardown_incomplete")
{
failures.push(format!(
"cgroup.kill must drain the WHOLE descendant tree to empty (no leak): {:?}",
report.observed
));
}
if !report
.observed
.iter()
.any(|f| f.kind == "cgroup_leaf_prepared")
{
failures.push(format!(
"the descendant must inherit a prepared run cgroup (CLONE_INTO_CGROUP): {:?}",
report.observed
));
}
assert!(
failures.is_empty(),
"ChildSpawnAllowDescendants cgroup oracle failures: {failures:#?}"
);
}
#[test]
fn an_allow_descendants_spec_runs_through_the_execute_path_or_skip() {
let mut sink = std::io::stderr();
let report = run_execute(&descendants_spec(vec![
"-c".to_string(),
"echo ran; true".to_string(),
]));
if let Some(ref report) = report {
if launch::report_confinement_unavailable(&report.observed) {
let _ = writeln!(
sink,
"SKIP an_allow_descendants_spec_runs_through_the_execute_path_or_skip: \
kernel/container lacks landlock/userns/seccomp (ENOSYS); confinement cannot \
install here — exercised on capable kernels + the bvisor-linux CI lane"
);
return;
}
}
match report {
Some(report) if report.outcome == Outcome::SupervisorFault => {
let _ = writeln!(
sink,
"SKIP ChildSpawnAllowDescendants execute-path runtime: admitted but the launcher \
could not place the child into the cgroup leaf (CLONE_INTO_CGROUP delegation \
limit) — never a silent pass"
);
}
Some(report) => {
assert_eq!(
report.outcome,
Outcome::Completed,
"an admitted AllowDescendants spec must run to Completed inside its cgroup: {:?}",
report.observed
);
let _ = writeln!(
sink,
"ChildSpawnAllowDescendants execute-path: admitted + Completed (cgroup-backed)"
);
}
None => {
let _ = writeln!(
sink,
"SKIP ChildSpawnAllowDescendants execute-path positive: no cgroup base — the \
fail-closed admission refusal holds (never a silent pass)"
);
}
}
}