use crate::cgroup::{CgroupManager, anyhow_first_io_errno};
use anyhow::{Context, Result};
use std::collections::BTreeSet;
use std::path::{Path, PathBuf};
use std::time::Duration;
const CGROUP2_SUPER_MAGIC: i64 = 0x6367_7270;
const RMDIR_EBUSY_RETRIES: u32 = 5;
const RMDIR_EBUSY_BACKOFF: Duration = Duration::from_millis(10);
const ORPHAN_MAX_AGE: Duration = Duration::from_secs(24 * 60 * 60);
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum SandboxDegraded {
NoCgroupV2,
NoCpusetController,
SubtreeControlRefused,
PermissionDenied,
RootCgroupRefused,
}
impl std::fmt::Display for SandboxDegraded {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SandboxDegraded::NoCgroupV2 => {
write!(f, "cgroup v2 not mounted at /sys/fs/cgroup")
}
SandboxDegraded::NoCpusetController => {
write!(f, "parent cgroup does not expose cpuset controller")
}
SandboxDegraded::SubtreeControlRefused => {
write!(f, "parent cgroup rejected +cpuset subtree_control")
}
SandboxDegraded::PermissionDenied => {
write!(f, "insufficient permission to create / write cgroup files")
}
SandboxDegraded::RootCgroupRefused => {
write!(
f,
"caller's cgroup is the cgroup v2 root (0::/); sandbox \
creation at the root violates the no-internal-process \
rule"
)
}
}
}
}
impl std::error::Error for SandboxDegraded {}
#[derive(Debug)]
pub(crate) struct SandboxInner {
cg: CgroupManager,
name: String,
parent_cgroup: PathBuf,
our_pid: u32,
}
#[derive(Debug)]
pub enum BuildSandbox {
Active(Box<SandboxInner>),
#[allow(dead_code)]
Degraded(SandboxDegraded),
}
impl BuildSandbox {
pub fn try_create(
plan_cpus: &[usize],
plan_mems: &BTreeSet<usize>,
hard_error_on_degrade: bool,
) -> Result<Self> {
match rustix::fs::statfs("/sys/fs/cgroup") {
#[allow(clippy::unnecessary_cast)]
Ok(sfs) if (sfs.f_type as i64) == CGROUP2_SUPER_MAGIC => {}
Ok(_) | Err(_) => {
return Self::degraded_or_err(SandboxDegraded::NoCgroupV2, hard_error_on_degrade);
}
}
let parent_rel = match read_self_cgroup_path() {
Ok(rel) => rel,
Err(_) => {
return Self::degraded_or_err(SandboxDegraded::NoCgroupV2, hard_error_on_degrade);
}
};
if is_root_cgroup(&parent_rel) {
return Self::degraded_or_err(
SandboxDegraded::RootCgroupRefused,
hard_error_on_degrade,
);
}
let parent_abs = Path::new("/sys/fs/cgroup").join(parent_rel.trim_start_matches('/'));
if !parent_controllers_include(&parent_abs, "cpuset") {
return Self::degraded_or_err(
SandboxDegraded::NoCpusetController,
hard_error_on_degrade,
);
}
let parent_str = match parent_abs.to_str() {
Some(s) => s,
None => {
return Self::degraded_or_err(SandboxDegraded::NoCgroupV2, hard_error_on_degrade);
}
};
let cg = CgroupManager::new(parent_str);
if let Err(e) = cg.add_parent_subtree_controller("cpuset") {
let raw = anyhow_first_io_errno(&e);
if raw == Some(libc::EACCES) || raw == Some(libc::EPERM) {
return Self::degraded_or_err(
SandboxDegraded::PermissionDenied,
hard_error_on_degrade,
);
}
if raw == Some(libc::EBUSY) {
return Self::degraded_or_err(
SandboxDegraded::SubtreeControlRefused,
hard_error_on_degrade,
);
}
return Err(e).with_context(|| {
format!(
"write +cpuset to {}/cgroup.subtree_control",
parent_abs.display()
)
});
}
sweep_orphan_sandboxes(&parent_abs);
let epoch_nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
let our_pid = std::process::id();
let name = format!("ktstr-build-{epoch_nanos}-{our_pid}");
if let Err(e) = cg.create_cgroup(&name) {
let raw = anyhow_first_io_errno(&e);
if raw == Some(libc::EACCES) || raw == Some(libc::EPERM) {
return Self::degraded_or_err(
SandboxDegraded::PermissionDenied,
hard_error_on_degrade,
);
}
return Err(e).with_context(|| format!("create {name}"));
}
let cpu_set: BTreeSet<usize> = plan_cpus.iter().copied().collect();
if let Err(e) = cg.set_cpuset(&name, &cpu_set) {
let _ = cg.remove_cgroup(&name);
return Err(e).context("write cpuset.cpus");
}
let effective_cpus =
read_cpuset_effective(&parent_abs.join(&name).join("cpuset.cpus.effective"));
if let Some(eff) = &effective_cpus
&& !cpuset_sets_equal(&cpu_set, eff)
{
tracing::warn!(
cgroup = %name,
requested = ?cpu_set,
effective = ?eff,
"tag=resource_budget.cpuset_cpus_degraded",
);
if hard_error_on_degrade {
let _ = cg.remove_cgroup(&name);
anyhow::bail!(
"--cpu-cap: cpuset.cpus narrowed by parent cgroup \
(requested {cpu_set:?}, effective {eff:?}). \
Run `ktstr locks --json` to inspect peers."
);
}
}
if let Err(e) = cg.set_cpuset_mems(&name, plan_mems) {
let _ = cg.remove_cgroup(&name);
return Err(e).context("write cpuset.mems");
}
let effective_mems =
read_cpuset_effective(&parent_abs.join(&name).join("cpuset.mems.effective"));
if let Some(eff) = &effective_mems
&& !cpuset_sets_equal(plan_mems, eff)
{
tracing::warn!(
cgroup = %name,
requested = ?plan_mems,
effective = ?eff,
"tag=resource_budget.cpuset_mems_degraded",
);
if hard_error_on_degrade {
let _ = cg.remove_cgroup(&name);
anyhow::bail!(
"--cpu-cap: cpuset.mems narrowed by parent cgroup \
(requested {plan_mems:?}, effective {eff:?}).\n\
\n\
Remediation:\n\
\n\
1. The parent cgroup's cpuset.mems is narrower than \
the plan's NUMA node set. Run `ktstr locks` to \
see which parent is active and `cat \
/proc/self/cgroup` for the path, then either \
widen that parent's cpuset.mems or move this \
process under a wider cgroup (systemd-run \
--user --scope -p Delegate=cpuset).\n\
2. Drop --cpu-cap to build under LLC flock \
coordination alone, trading NUMA enforcement \
for the noisier fallback path."
);
}
}
if let Err(e) = cg.move_task(&name, our_pid as libc::pid_t) {
let _ = cg.remove_cgroup(&name);
let raw = anyhow_first_io_errno(&e);
if raw == Some(libc::EACCES) || raw == Some(libc::EPERM) {
return Self::degraded_or_err(
SandboxDegraded::PermissionDenied,
hard_error_on_degrade,
);
}
return Err(e).context("migrate self into cgroup.procs");
}
Ok(BuildSandbox::Active(Box::new(SandboxInner {
cg,
name,
parent_cgroup: parent_abs,
our_pid,
})))
}
#[allow(dead_code)]
pub fn is_active(&self) -> bool {
matches!(self, BuildSandbox::Active(_))
}
fn degraded_or_err(kind: SandboxDegraded, hard_error_on_degrade: bool) -> Result<Self> {
if hard_error_on_degrade {
Err(anyhow::anyhow!(
"--cpu-cap: {kind}. This host cannot enforce the \
resource budget.\n\
\n\
Remediation (pick one):\n\
\n\
1. Re-run under a systemd transient scope with a \
writable cpuset-capable cgroup:\n\
\n\
systemd-run --user --scope \\\n\
-p 'Delegate=cpuset cpu' \\\n\
cargo ktstr kernel build --source <path> --cpu-cap N\n\
\n\
2. Re-run with sudo preserving env so KTSTR_CPU_CAP / \
KTSTR_CACHE_DIR / RUST_LOG propagate to the root \
invocation:\n\
\n\
sudo -E cargo ktstr kernel build --source <path> --cpu-cap N\n\
\n\
3. Enable cpuset delegation on the caller's cgroup \
by adding `cpuset` to the parent's cgroup.subtree_control \
(requires CAP_SYS_ADMIN).\n\
\n\
4. Drop --cpu-cap to build without the cgroup-level \
resource contract (falls back to LLC flock \
coordination only)."
))
} else {
Ok(BuildSandbox::Degraded(kind))
}
}
}
impl Drop for BuildSandbox {
fn drop(&mut self) {
let BuildSandbox::Active(boxed) = self else {
return;
};
let inner: &SandboxInner = boxed;
if let Err(e) = inner.cg.drain_tasks(&inner.name) {
tracing::warn!(
cgroup = %inner.name,
parent = %inner.parent_cgroup.display(),
err = %e,
"resource_budget: drain_tasks failed during sandbox drop",
);
}
for attempt in 0..RMDIR_EBUSY_RETRIES {
match inner.cg.remove_cgroup(&inner.name) {
Ok(()) => return,
Err(e) => {
let raw = anyhow_first_io_errno(&e);
if raw == Some(libc::EBUSY) && attempt + 1 < RMDIR_EBUSY_RETRIES {
std::thread::sleep(RMDIR_EBUSY_BACKOFF);
continue;
}
tracing::warn!(
cgroup = %inner.name,
parent = %inner.parent_cgroup.display(),
our_pid = inner.our_pid,
err = %e,
"tag=resource_budget.cgroup_orphan_left",
);
return;
}
}
}
}
}
pub(crate) fn is_root_cgroup(parent_rel: &str) -> bool {
let trimmed = parent_rel.trim();
trimmed == "/" || trimmed.is_empty()
}
fn read_self_cgroup_path() -> Result<String> {
let text = std::fs::read_to_string("/proc/self/cgroup").context("read /proc/self/cgroup")?;
for line in text.lines() {
if let Some(rest) = line.strip_prefix("0::") {
return Ok(rest.trim().to_string());
}
}
anyhow::bail!("no cgroup v2 entry (0::) in /proc/self/cgroup")
}
fn parent_controllers_include(parent_abs: &Path, controller: &str) -> bool {
let path = parent_abs.join("cgroup.controllers");
match std::fs::read_to_string(&path) {
Ok(contents) => contents.split_whitespace().any(|c| c == controller),
Err(_) => false,
}
}
fn read_cpuset_effective(path: &Path) -> Option<BTreeSet<usize>> {
let text = std::fs::read_to_string(path).ok()?;
Some(
crate::topology::parse_cpu_list_lenient(text.trim())
.into_iter()
.collect(),
)
}
fn cpuset_sets_equal(requested: &BTreeSet<usize>, effective: &BTreeSet<usize>) -> bool {
requested == effective
}
#[derive(Debug, PartialEq, Eq)]
pub(crate) enum SweepSkip {
PidLive,
MetadataUnreadable,
MtimeUnreadable,
MtimeYoung,
}
pub(crate) fn sweep_skip_reason(
pid: libc::pid_t,
meta: Option<std::fs::Metadata>,
now: std::time::SystemTime,
) -> Option<SweepSkip> {
let kill_rc = unsafe { libc::kill(pid, 0) };
let live = kill_rc == 0 || std::io::Error::last_os_error().raw_os_error() != Some(libc::ESRCH);
if live {
return Some(SweepSkip::PidLive);
}
let meta = match meta {
Some(m) => m,
None => return Some(SweepSkip::MetadataUnreadable),
};
let mtime = match meta.modified() {
Ok(m) => m,
Err(_) => return Some(SweepSkip::MtimeUnreadable),
};
let age = now.duration_since(mtime).unwrap_or(Duration::ZERO);
if age < ORPHAN_MAX_AGE {
return Some(SweepSkip::MtimeYoung);
}
None
}
fn sweep_orphan_sandboxes(parent_abs: &Path) {
let entries = match std::fs::read_dir(parent_abs) {
Ok(e) => e,
Err(_) => return,
};
let now = std::time::SystemTime::now();
for entry in entries.flatten() {
let name = match entry.file_name().into_string() {
Ok(n) => n,
Err(_) => continue,
};
if !name.starts_with("ktstr-build-") {
continue;
}
let pid = match name
.rsplit_once('-')
.and_then(|(_, tail)| tail.parse::<i32>().ok())
{
Some(p) => p,
None => continue,
};
if sweep_skip_reason(pid, entry.metadata().ok(), now).is_some() {
continue;
}
let cg = match parent_abs.to_str() {
Some(s) => CgroupManager::new(s),
None => continue,
};
if let Err(e) = cg.remove_cgroup(&name) {
tracing::warn!(
cgroup = %name,
parent = %parent_abs.display(),
err = %e,
"resource_budget: orphan sweep remove_cgroup failed",
);
}
}
}
#[cfg(test)]
fn apply_sandbox_sequence(
cg: &dyn crate::cgroup::CgroupOps,
name: &str,
cpu_set: &BTreeSet<usize>,
mem_set: &BTreeSet<usize>,
pid: libc::pid_t,
) -> Result<()> {
if let Err(e) = cg.set_cpuset(name, cpu_set) {
let _ = cg.remove_cgroup(name);
return Err(e).context("write cpuset.cpus");
}
if let Err(e) = cg.set_cpuset_mems(name, mem_set) {
let _ = cg.remove_cgroup(name);
return Err(e).context("write cpuset.mems");
}
if let Err(e) = cg.move_task(name, pid) {
let _ = cg.remove_cgroup(name);
return Err(e).context("migrate self into cgroup.procs");
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn read_self_cgroup_returns_path() {
let rel = read_self_cgroup_path().expect("proc self cgroup readable");
assert!(
rel.starts_with('/'),
"cgroup path must be absolute-ish: {rel}"
);
}
#[test]
fn cpuset_sets_equal_identity() {
let mut a = BTreeSet::new();
a.insert(0);
a.insert(2);
let mut b = BTreeSet::new();
b.insert(0);
b.insert(2);
assert!(cpuset_sets_equal(&a, &b), "identity sets must be equal",);
}
#[test]
fn cpuset_sets_equal_narrower_effective() {
let mut req = BTreeSet::new();
req.insert(0);
req.insert(1);
req.insert(2);
let mut eff = BTreeSet::new();
eff.insert(0);
eff.insert(1);
assert!(
!cpuset_sets_equal(&req, &eff),
"narrower effective must not equal requested",
);
}
#[test]
fn sandbox_degraded_display_text() {
let nc = format!("{}", SandboxDegraded::NoCgroupV2);
assert!(nc.contains("cgroup v2"), "NoCgroupV2: {nc}");
let ncc = format!("{}", SandboxDegraded::NoCpusetController);
assert!(ncc.contains("cpuset"), "NoCpusetController: {ncc}");
let scr = format!("{}", SandboxDegraded::SubtreeControlRefused);
assert!(
scr.contains("subtree_control"),
"SubtreeControlRefused: {scr}",
);
let pd = format!("{}", SandboxDegraded::PermissionDenied);
assert!(pd.contains("permission"), "PermissionDenied: {pd}");
let rcr = format!("{}", SandboxDegraded::RootCgroupRefused);
assert!(rcr.contains("root"), "RootCgroupRefused: {rcr}");
}
#[test]
fn parent_controllers_include_missing_file() {
let path = Path::new("/nonexistent/ktstr-controllers-test");
assert!(
!parent_controllers_include(path, "cpuset"),
"nonexistent path must report no controllers",
);
}
#[test]
fn read_cpuset_effective_missing_file_returns_none() {
let path = Path::new("/nonexistent/ktstr-effective-test/cpuset.cpus.effective");
assert!(
read_cpuset_effective(path).is_none(),
"nonexistent path must return None",
);
}
#[test]
fn sweep_orphan_sandboxes_on_nonexistent_parent_is_noop() {
sweep_orphan_sandboxes(Path::new("/nonexistent/ktstr-sweep-test-xyz"));
}
#[test]
fn sweep_orphan_sandboxes_ignores_non_ktstr_entries() {
let dir =
std::env::temp_dir().join(format!("ktstr-sandbox-sweep-test-{}", std::process::id()));
std::fs::create_dir_all(&dir).unwrap();
let unrelated = dir.join("some-other-dir");
std::fs::create_dir(&unrelated).unwrap();
sweep_orphan_sandboxes(&dir);
assert!(
unrelated.exists(),
"sweep must not remove non-ktstr entries"
);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn sweep_orphan_sandboxes_skips_malformed_pid_suffix() {
let dir = std::env::temp_dir().join(format!(
"ktstr-sandbox-malformed-test-{}",
std::process::id()
));
std::fs::create_dir_all(&dir).unwrap();
let malformed = dir.join("ktstr-build-123-NOTAPID");
std::fs::create_dir(&malformed).unwrap();
sweep_orphan_sandboxes(&dir);
assert!(
malformed.exists(),
"sweep must skip entries with unparseable pid suffix"
);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn sweep_orphan_sandboxes_skips_empty_pid_suffix() {
let dir =
std::env::temp_dir().join(format!("ktstr-sandbox-empty-suffix-{}", std::process::id()));
std::fs::create_dir_all(&dir).unwrap();
let empty_suffix = dir.join("ktstr-build-");
std::fs::create_dir(&empty_suffix).unwrap();
sweep_orphan_sandboxes(&dir);
assert!(
empty_suffix.exists(),
"sweep must skip entries with empty (post-rsplit) pid tail",
);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn sweep_orphan_sandboxes_skips_direct_non_numeric_pid() {
let dir =
std::env::temp_dir().join(format!("ktstr-sandbox-non-numeric-{}", std::process::id()));
std::fs::create_dir_all(&dir).unwrap();
let non_numeric = dir.join("ktstr-build-NOTAPID");
std::fs::create_dir(&non_numeric).unwrap();
sweep_orphan_sandboxes(&dir);
assert!(
non_numeric.exists(),
"sweep must skip entries whose tail is directly non-numeric",
);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn sandbox_filename_prefix_pins_to_ktstr_build() {
let name = format!("ktstr-build-{}-{}", 1_700_000_000_u64, 12345_u32);
assert!(
name.starts_with("ktstr-build-"),
"sweep keys on this prefix: {name}"
);
let pid_str = name.rsplit_once('-').map(|(_, t)| t).unwrap();
assert!(
pid_str.parse::<i32>().is_ok(),
"trailing segment must be i32-parseable: {pid_str}"
);
}
#[test]
fn build_sandbox_is_active_discriminates_variants() {
let degraded = BuildSandbox::Degraded(SandboxDegraded::NoCgroupV2);
assert!(!degraded.is_active(), "Degraded must report !is_active()");
let fake_parent =
std::path::PathBuf::from("/nonexistent/ktstr-build-sandbox-is-active-test");
let active = BuildSandbox::Active(Box::new(SandboxInner {
cg: CgroupManager::new(fake_parent.to_str().expect("utf-8 test path")),
name: "ktstr-build-test-name".to_string(),
parent_cgroup: fake_parent,
our_pid: 1,
}));
assert!(active.is_active(), "Active must report is_active()");
}
#[test]
fn build_sandbox_try_create_returns_without_panic() {
let cpus: Vec<usize> = Vec::new();
let mems: BTreeSet<usize> = BTreeSet::new();
let result = BuildSandbox::try_create(&cpus, &mems, false);
match &result {
Ok(BuildSandbox::Active(_)) => {
}
Ok(BuildSandbox::Degraded(kind)) => {
assert!(
!format!("{kind}").is_empty(),
"SandboxDegraded::Display must be non-empty: {kind:?}",
);
}
Err(e) => {
panic!("try_create unexpected hard error: {e:#}");
}
}
}
#[test]
fn build_sandbox_try_create_hard_error_converts_degrade() {
let cpus: Vec<usize> = Vec::new();
let mems: BTreeSet<usize> = BTreeSet::new();
let result = BuildSandbox::try_create(&cpus, &mems, true);
match &result {
Ok(BuildSandbox::Active(_)) => {
}
Ok(BuildSandbox::Degraded(kind)) => {
panic!(
"hard_error_on_degrade=true must NOT return Degraded; \
got {kind:?}"
);
}
Err(e) => {
let msg = format!("{e:#}");
assert!(
msg.contains("--cpu-cap"),
"hard error must name the contract flag: {msg}",
);
assert!(
msg.contains("Remediation"),
"hard error must include remediation block: {msg}",
);
}
}
}
#[test]
fn build_sandbox_drop_on_nonexistent_parent_does_not_panic() {
let fake_parent = std::path::PathBuf::from("/nonexistent/ktstr-build-drop-test-xyz");
let sandbox = BuildSandbox::Active(Box::new(SandboxInner {
cg: CgroupManager::new(fake_parent.to_str().expect("utf-8 test path")),
name: "ktstr-build-drop-test-name".to_string(),
parent_cgroup: fake_parent,
our_pid: 1,
}));
drop(sandbox);
}
#[test]
fn build_sandbox_drop_on_degraded_is_noop() {
let sandbox = BuildSandbox::Degraded(SandboxDegraded::NoCgroupV2);
drop(sandbox);
}
#[test]
fn sandbox_degraded_all_variants_display_non_empty() {
let variants = [
SandboxDegraded::NoCgroupV2,
SandboxDegraded::NoCpusetController,
SandboxDegraded::SubtreeControlRefused,
SandboxDegraded::PermissionDenied,
SandboxDegraded::RootCgroupRefused,
];
for v in variants {
let text = format!("{v}");
assert!(
!text.is_empty(),
"SandboxDegraded::{v:?} must have non-empty Display",
);
}
}
#[test]
fn is_root_cgroup_handles_slash_empty_and_whitespace() {
assert!(is_root_cgroup("/"), "literal / is the root");
assert!(is_root_cgroup(""), "empty string is treated as root");
assert!(is_root_cgroup(" "), "whitespace-only is treated as root");
assert!(is_root_cgroup("/\n"), "slash + newline trims to root");
assert!(!is_root_cgroup("/user.slice"), "/user.slice is not root");
assert!(
!is_root_cgroup("/user.slice/session-1.scope"),
"nested slice is not root",
);
assert!(!is_root_cgroup("/a"), "/a is not root");
}
use std::sync::Mutex;
#[derive(Default)]
struct MockCgroupOps {
calls: Mutex<Vec<String>>,
set_cpuset_err: Mutex<Option<&'static str>>,
set_cpuset_mems_err: Mutex<Option<&'static str>>,
move_task_err: Mutex<Option<&'static str>>,
}
impl MockCgroupOps {
fn record(&self, call: &str) {
self.calls.lock().unwrap().push(call.to_string());
}
fn calls_snapshot(&self) -> Vec<String> {
self.calls.lock().unwrap().clone()
}
}
impl crate::cgroup::CgroupOps for MockCgroupOps {
fn parent_path(&self) -> &Path {
Path::new("/mock")
}
fn setup(&self, _: bool) -> Result<()> {
self.record("setup");
Ok(())
}
fn create_cgroup(&self, name: &str) -> Result<()> {
self.record(&format!("create_cgroup({name})"));
Ok(())
}
fn remove_cgroup(&self, name: &str) -> Result<()> {
self.record(&format!("remove_cgroup({name})"));
Ok(())
}
fn set_cpuset(&self, name: &str, _: &BTreeSet<usize>) -> Result<()> {
self.record(&format!("set_cpuset({name})"));
if let Some(msg) = *self.set_cpuset_err.lock().unwrap() {
anyhow::bail!("{msg}");
}
Ok(())
}
fn clear_cpuset(&self, name: &str) -> Result<()> {
self.record(&format!("clear_cpuset({name})"));
Ok(())
}
fn set_cpuset_mems(&self, name: &str, _: &BTreeSet<usize>) -> Result<()> {
self.record(&format!("set_cpuset_mems({name})"));
if let Some(msg) = *self.set_cpuset_mems_err.lock().unwrap() {
anyhow::bail!("{msg}");
}
Ok(())
}
fn clear_cpuset_mems(&self, name: &str) -> Result<()> {
self.record(&format!("clear_cpuset_mems({name})"));
Ok(())
}
fn move_task(&self, name: &str, pid: libc::pid_t) -> Result<()> {
self.record(&format!("move_task({name},{pid})"));
if let Some(msg) = *self.move_task_err.lock().unwrap() {
anyhow::bail!("{msg}");
}
Ok(())
}
fn move_tasks(&self, name: &str, _: &[libc::pid_t]) -> Result<()> {
self.record(&format!("move_tasks({name})"));
Ok(())
}
fn clear_subtree_control(&self, name: &str) -> Result<()> {
self.record(&format!("clear_subtree_control({name})"));
Ok(())
}
fn drain_tasks(&self, name: &str) -> Result<()> {
self.record(&format!("drain_tasks({name})"));
Ok(())
}
fn cleanup_all(&self) -> Result<()> {
self.record("cleanup_all");
Ok(())
}
}
fn test_sets() -> (BTreeSet<usize>, BTreeSet<usize>) {
(
[0usize, 1, 2].into_iter().collect(),
[0usize].into_iter().collect(),
)
}
#[test]
fn apply_sandbox_sequence_rolls_back_on_set_cpuset_failure() {
let mock = MockCgroupOps::default();
*mock.set_cpuset_err.lock().unwrap() = Some("EINVAL");
let (cpus, mems) = test_sets();
let err = apply_sandbox_sequence(&mock, "sbx", &cpus, &mems, 1)
.expect_err("F.1 failure must propagate");
let msg = format!("{err:#}");
assert!(msg.contains("cpuset.cpus"), "err must name the step: {msg}");
let calls = mock.calls_snapshot();
assert_eq!(
calls,
vec![
"set_cpuset(sbx)".to_string(),
"remove_cgroup(sbx)".to_string()
],
"exact call order: set_cpuset THEN remove_cgroup, with NO \
set_cpuset_mems or move_task",
);
}
#[test]
fn apply_sandbox_sequence_rolls_back_on_set_cpuset_mems_failure() {
let mock = MockCgroupOps::default();
*mock.set_cpuset_mems_err.lock().unwrap() = Some("EINVAL");
let (cpus, mems) = test_sets();
let err = apply_sandbox_sequence(&mock, "sbx", &cpus, &mems, 1)
.expect_err("F.2 failure must propagate");
let msg = format!("{err:#}");
assert!(msg.contains("cpuset.mems"), "err must name the step: {msg}");
let calls = mock.calls_snapshot();
assert_eq!(
calls,
vec![
"set_cpuset(sbx)".to_string(),
"set_cpuset_mems(sbx)".to_string(),
"remove_cgroup(sbx)".to_string(),
],
"exact order: F.1 ok, F.2 fails, remove_cgroup, with NO move_task",
);
}
#[test]
fn apply_sandbox_sequence_rolls_back_on_move_task_failure() {
let mock = MockCgroupOps::default();
*mock.move_task_err.lock().unwrap() = Some("ESRCH");
let (cpus, mems) = test_sets();
let err = apply_sandbox_sequence(&mock, "sbx", &cpus, &mems, 42)
.expect_err("G failure must propagate");
let msg = format!("{err:#}");
assert!(
msg.contains("migrate self into cgroup.procs"),
"err must name the step: {msg}",
);
let calls = mock.calls_snapshot();
assert_eq!(
calls,
vec![
"set_cpuset(sbx)".to_string(),
"set_cpuset_mems(sbx)".to_string(),
"move_task(sbx,42)".to_string(),
"remove_cgroup(sbx)".to_string(),
],
"exact order: F.1, F.2, G fails, remove_cgroup",
);
}
fn fake_entry_with_mtime(age: Duration) -> (tempfile::TempDir, std::fs::Metadata) {
let tmp = tempfile::TempDir::new().expect("tempdir");
let child = tmp.path().join("entry");
std::fs::create_dir(&child).expect("mkdir");
let target_mtime = std::time::SystemTime::now()
.checked_sub(age)
.expect("time arithmetic");
let secs = target_mtime
.duration_since(std::time::UNIX_EPOCH)
.expect("post-epoch")
.as_secs();
let buf = libc::utimbuf {
actime: secs as libc::time_t,
modtime: secs as libc::time_t,
};
let cpath = std::ffi::CString::new(child.to_str().expect("utf8")).expect("nul-free");
let rc = unsafe { libc::utime(cpath.as_ptr(), &buf) };
assert_eq!(rc, 0, "utime must succeed on our tempdir");
let meta = std::fs::metadata(&child).expect("stat");
(tmp, meta)
}
#[test]
fn sweep_skip_reason_live_pid_with_old_mtime_blocks_on_pid() {
let (_tmp, meta) = fake_entry_with_mtime(Duration::from_secs(48 * 60 * 60));
let our_pid = std::process::id() as libc::pid_t;
let now = std::time::SystemTime::now();
assert_eq!(
sweep_skip_reason(our_pid, Some(meta), now),
Some(SweepSkip::PidLive),
"live pid must block sweep even when mtime is old",
);
}
#[test]
fn sweep_skip_reason_dead_pid_with_fresh_mtime_blocks_on_mtime() {
let pid = unsafe { libc::fork() };
assert!(pid >= 0, "fork must succeed");
if pid == 0 {
unsafe {
libc::_exit(0);
}
}
let mut status: libc::c_int = 0;
let rc = unsafe { libc::waitpid(pid, &mut status, 0) };
assert_eq!(rc, pid, "waitpid must reap our child");
let (_tmp, meta) = fake_entry_with_mtime(Duration::from_secs(60));
let now = std::time::SystemTime::now();
assert_eq!(
sweep_skip_reason(pid, Some(meta), now),
Some(SweepSkip::MtimeYoung),
"dead pid with fresh mtime must block on the mtime gate",
);
}
#[test]
fn sweep_skip_reason_dead_pid_with_old_mtime_sweeps() {
let pid = unsafe { libc::fork() };
assert!(pid >= 0, "fork must succeed");
if pid == 0 {
unsafe {
libc::_exit(0);
}
}
let mut status: libc::c_int = 0;
let rc = unsafe { libc::waitpid(pid, &mut status, 0) };
assert_eq!(rc, pid, "waitpid must reap our child");
let (_tmp, meta) = fake_entry_with_mtime(Duration::from_secs(48 * 60 * 60));
let now = std::time::SystemTime::now();
assert_eq!(
sweep_skip_reason(pid, Some(meta), now),
None,
"dead pid + old mtime is the only config that sweeps",
);
}
#[test]
fn sweep_skip_reason_eperm_pid_treated_as_live() {
let (_tmp, meta) = fake_entry_with_mtime(Duration::from_secs(48 * 60 * 60));
let now = std::time::SystemTime::now();
assert_eq!(
sweep_skip_reason(1, Some(meta), now),
Some(SweepSkip::PidLive),
"pid 1 must be treated as live (EPERM or success, not ESRCH)",
);
}
#[test]
fn apply_sandbox_sequence_success_does_not_call_remove_cgroup() {
let mock = MockCgroupOps::default();
let (cpus, mems) = test_sets();
apply_sandbox_sequence(&mock, "sbx", &cpus, &mems, 42).expect("all steps ok must succeed");
let calls = mock.calls_snapshot();
assert_eq!(
calls,
vec![
"set_cpuset(sbx)".to_string(),
"set_cpuset_mems(sbx)".to_string(),
"move_task(sbx,42)".to_string(),
],
"exact F.1→F.2→G order, NO remove_cgroup on success",
);
assert!(
!calls.iter().any(|c| c.starts_with("remove_cgroup")),
"remove_cgroup must NOT fire on success: {calls:?}",
);
}
}