#![cfg(all(target_os = "linux", feature = "backend-linux"))]
use bvisor::linux::cgroup::{probe_controller_base, CgroupLeaf, CgroupLimits};
use bvisor::linux::launch::{
launch_confinement_unavailable, resolve_launcher_path, run_launcher, AuthorityFd,
};
use bvisor::linux::protocol::{
DescriptorKind, DescriptorRole, DescriptorShape, DescriptorSlotV1, LinuxLaunchBodyV1,
LinuxLaunchPlanV1, LoweringWireEntryV1, LoweringWireV1, TargetSpecV1,
};
use bvisor::{AdmissionProgramHash, AttemptId, BackendProfileHash, BoundaryPlanHash};
use std::io::Write;
use std::os::fd::{OwnedFd, RawFd};
use std::sync::atomic::{AtomicU64, Ordering};
const ID_AMBIENT_SCRUB: &str = "linux.ambient.scrub.v1";
const ID_EXEC: &str = "linux.exec.v1";
const PHASE_CODE_SCRUB: u8 = 3; const PHASE_CODE_EXEC: u8 = 5;
const EXE_SLOT: u32 = 10;
const CGROUP_SLOT: u32 = 11;
const CGROUP_V2_ROOT: &str = "/sys/fs/cgroup";
static LEAF_COUNTER: AtomicU64 = AtomicU64::new(0);
fn entry(id: &str, phase_code: u8) -> LoweringWireEntryV1 {
LoweringWireEntryV1 {
id: id.to_owned(),
version: 1,
phase_code,
param_digest: [0u8; 32],
decl_digest: [0u8; 32],
}
}
fn cgroup_plan() -> LinuxLaunchPlanV1 {
let lowering = LoweringWireV1 {
entries: vec![
entry(ID_AMBIENT_SCRUB, PHASE_CODE_SCRUB),
entry(ID_EXEC, PHASE_CODE_EXEC),
],
};
let bytes = batpak::canonical::to_bytes(&lowering).expect("encode lowering");
let h_l = batpak::event::hash::compute_hash(&bytes);
let table = vec![
DescriptorSlotV1 {
slot_index: EXE_SLOT,
role: DescriptorRole::TargetExe,
expected: DescriptorShape {
kind: DescriptorKind::Regular,
writable: false,
},
},
DescriptorSlotV1 {
slot_index: CGROUP_SLOT,
role: DescriptorRole::CgroupDir,
expected: DescriptorShape {
kind: DescriptorKind::Directory,
writable: false,
},
},
];
LinuxLaunchPlanV1 {
body: LinuxLaunchBodyV1 {
attempt_id: AttemptId([7u8; 32]),
plan_id: BoundaryPlanHash([1u8; 32]),
h_a: AdmissionProgramHash([2u8; 32]),
h_p: BackendProfileHash([3u8; 32]),
h_l,
lowering,
descriptor_table: table,
target: TargetSpecV1 {
argv: vec!["cat".to_owned(), "/proc/self/cgroup".to_owned()],
envp: vec![("PATH".to_owned(), "/usr/bin".to_owned())],
exe_slot: EXE_SLOT,
user_namespace: None,
network_namespace: None,
seccomp: None,
},
},
}
}
fn unified_line(proc_cgroup: &str) -> Option<String> {
for line in proc_cgroup.lines() {
if let Some(path) = line.strip_prefix("0::") {
return Some(path.trim().to_owned());
}
}
None
}
#[test]
fn clone_into_cgroup_births_the_child_inside_the_prepared_leaf_or_skip() {
let mut sink = std::io::stderr();
let Some(base) = probe_controller_base(&["pids"]) else {
let _ = writeln!(
sink,
"SKIP clone_into_cgroup: no writable `pids`-delegating cgroup v2 ancestor on this host"
);
return;
};
let suffix = LEAF_COUNTER.fetch_add(1, Ordering::Relaxed);
let name = format!("bvisor-clone-{}-{suffix}", std::process::id());
let mut leaf = match CgroupLeaf::create(&base, &name, CgroupLimits::with_pids_max(64)) {
Ok(leaf) => leaf,
Err(e) => {
let _ = writeln!(
sink,
"SKIP clone_into_cgroup: leaf create failed ({e}); treating as no-delegation"
);
return;
}
};
let leaf_dir = leaf.dir().expect("leaf dir").to_path_buf();
let expected_rel = leaf_dir
.to_string_lossy()
.strip_prefix(CGROUP_V2_ROOT)
.map(str::to_owned)
.expect("leaf dir is under the cgroup v2 mount");
let host_rel = unified_line(&std::fs::read_to_string("/proc/self/cgroup").unwrap_or_default())
.unwrap_or_default();
let cat = OwnedFd::from(std::fs::File::open("/bin/cat").expect("open /bin/cat"));
let leaf_fd = leaf.dir_fd().expect("open leaf dir fd");
let authority = vec![
AuthorityFd {
slot_index: RawFd::try_from(EXE_SLOT).expect("exe slot fits RawFd"),
handle: cat,
},
AuthorityFd {
slot_index: RawFd::try_from(CGROUP_SLOT).expect("cgroup slot fits RawFd"),
handle: leaf_fd,
},
];
let launcher = resolve_launcher_path(env!("CARGO_BIN_EXE_bvisor-linux-launcher"));
let plan = cgroup_plan();
let observation = run_launcher(&launcher, &plan, authority);
let _ = leaf.kill();
let _ = leaf.wait_until_empty(50, std::time::Duration::from_millis(10));
let _ = leaf.remove();
let observation = observation.expect("harness ran the launcher");
if launch_confinement_unavailable(&observation) {
let _ = writeln!(
sink,
"SKIP clone_into_cgroup_births_the_child_inside_the_prepared_leaf_or_skip: \
kernel/container lacks landlock/userns/seccomp (ENOSYS); the launcher faulted before \
exec — exercised on capable kernels + the bvisor-linux CI lane"
);
return;
}
let child_cgroup = unified_line(&String::from_utf8_lossy(&observation.captured_stdout));
let _ = writeln!(
sink,
"child /proc/self/cgroup 0:: line = {child_cgroup:?}; expected leaf = {expected_rel:?}; host = {host_rel:?}"
);
assert!(
observation.exec_succeeded(),
"the workload must run to success in the leaf; transcript: {:?}",
observation.transcript
);
let child_cgroup = child_cgroup.expect("the workload printed its 0:: cgroup line to stdout");
assert_eq!(
child_cgroup, expected_rel,
"the child must be born INSIDE the prepared leaf (CLONE_INTO_CGROUP)"
);
assert_ne!(
child_cgroup, host_rel,
"NON-VACUOUS: the child's cgroup must DIFFER from the host's — it really moved \
into the leaf, it did not merely inherit our scope"
);
}