use crate::topology::TestTopology;
use anyhow::{Context, Result, anyhow, bail};
use std::collections::BTreeSet;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::mpsc;
use std::time::Duration;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Controller {
Cpuset,
Cpu,
Memory,
Pids,
Io,
}
impl Controller {
pub fn name(self) -> &'static str {
match self {
Controller::Cpuset => "cpuset",
Controller::Cpu => "cpu",
Controller::Memory => "memory",
Controller::Pids => "pids",
Controller::Io => "io",
}
}
}
const CGROUP_WRITE_TIMEOUT: Duration = Duration::from_secs(2);
fn write_with_timeout(path: &Path, data: &str, timeout: Duration) -> Result<()> {
let display = path.display().to_string();
let path = path.to_owned();
let data = data.to_owned();
let (tx, rx) = mpsc::channel();
std::thread::spawn(move || {
let result = fs::write(&path, &data);
let _ = tx.send(result);
});
match rx.recv_timeout(timeout) {
Ok(Ok(())) => Ok(()),
Ok(Err(e)) => {
let errno_suffix = e
.raw_os_error()
.and_then(crate::errno_name)
.map(|name| format!(" ({name})"))
.unwrap_or_default();
Err(e).with_context(|| format!("write {display}{errno_suffix}"))
}
Err(_) => bail!(
"cgroup write to {display} timed out after {}ms",
timeout.as_millis()
),
}
}
fn validate_cgroup_name(name: &str) -> Result<()> {
if name.is_empty() {
bail!("cgroup name must not be empty");
}
if name.starts_with('/') {
bail!(
"cgroup name '{name}' starts with '/' — would escape the \
managed parent via Path::join (absolute paths replace the \
join base)"
);
}
if name.contains('\0') {
bail!("cgroup name '{name}' contains a NUL byte");
}
for component in name.split('/') {
if component.is_empty() {
bail!(
"cgroup name '{name}' contains an empty path component \
(consecutive '/') — Path::join would emit a malformed path"
);
}
if component == ".." {
bail!(
"cgroup name '{name}' contains a '..' component — \
would escape the managed parent via Path::join"
);
}
if component == "." {
bail!(
"cgroup name '{name}' contains a '.' component — \
ambiguous self-reference, refuse before fs writes"
);
}
if component.starts_with('.') {
bail!(
"cgroup name '{name}' contains a leading-dot component \
('{component}') — produces a hidden cgroupfs entry"
);
}
}
Ok(())
}
pub(crate) fn anyhow_first_io_errno(err: &anyhow::Error) -> Option<i32> {
err.chain()
.find_map(|cause| cause.downcast_ref::<std::io::Error>())
.and_then(|io| io.raw_os_error())
}
fn is_esrch(err: &anyhow::Error) -> bool {
anyhow_first_io_errno(err) == Some(libc::ESRCH)
}
fn is_ebusy(err: &anyhow::Error) -> bool {
anyhow_first_io_errno(err) == Some(libc::EBUSY)
}
fn capture_cpuset_state(parent: &Path, name: &str) -> String {
let child = parent.join(name);
let parent_controllers = read_or_label(&parent.join("cgroup.controllers"));
let parent_subtree_control = read_or_label(&parent.join("cgroup.subtree_control"));
let child_controllers = read_or_label(&child.join("cgroup.controllers"));
let cpuset_cpus_exists = child.join("cpuset.cpus").exists();
let child_listing = match fs::read_dir(&child) {
Ok(entries) => {
let mut names: Vec<String> = entries
.filter_map(|e| e.ok())
.map(|e| e.file_name().to_string_lossy().into_owned())
.collect();
names.sort_unstable();
format!("[{}]", names.join(", "))
}
Err(e) => format!("<read_dir failed: {e}>"),
};
format!(
"cgroup-state-snapshot: \
parent={} name={} \
parent.cgroup.controllers={:?} \
parent.cgroup.subtree_control={:?} \
child.cgroup.controllers={:?} \
child.cpuset.cpus.exists={} \
child.listing={}",
parent.display(),
name,
parent_controllers,
parent_subtree_control,
child_controllers,
cpuset_cpus_exists,
child_listing,
)
}
fn read_or_label(path: &Path) -> String {
match fs::read_to_string(path) {
Ok(s) => s.trim().to_string(),
Err(e) => format!("<read failed: {e}>"),
}
}
const MAX_OUTSTANDING_REMOVES: usize = 10;
#[derive(Debug)]
pub struct CgroupManager {
parent: PathBuf,
outstanding_removes: AtomicUsize,
}
impl CgroupManager {
pub fn new(parent: &str) -> Self {
Self {
parent: PathBuf::from(parent),
outstanding_removes: AtomicUsize::new(0),
}
}
pub fn parent_path(&self) -> &std::path::Path {
&self.parent
}
pub fn outstanding_removes(&self) -> usize {
self.outstanding_removes.load(Ordering::Relaxed)
}
pub fn setup(&self, controllers: &BTreeSet<Controller>) -> Result<()> {
self.setup_under_root(controllers, &PathBuf::from("/sys/fs/cgroup"))
}
fn setup_under_root(&self, controllers: &BTreeSet<Controller>, root: &Path) -> Result<()> {
if !self.parent.exists() {
fs::create_dir_all(&self.parent)
.with_context(|| format!("mkdir {}", self.parent.display()))?;
}
if controllers.is_empty() {
return Ok(());
}
if let Ok(rel) = self.parent.strip_prefix(root) {
let available_path = root.join("cgroup.controllers");
if available_path.exists() {
let raw = fs::read_to_string(&available_path).with_context(|| {
format!("read cgroup.controllers: {}", available_path.display())
})?;
let available: BTreeSet<&str> = raw.split_whitespace().collect();
for c in controllers {
if !available.contains(c.name()) {
return Err(anyhow!(
"cgroup controller '{}' not available at {}; \
cgroup.controllers reports {:?}. CONFIG_{}_CONTROLLER \
may be unset, or the controller is masked at this \
level of the hierarchy",
c.name(),
available_path.display(),
available,
c.name().to_uppercase(),
));
}
}
}
let line: String = controllers
.iter()
.map(|c| format!("+{}", c.name()))
.collect::<Vec<_>>()
.join(" ");
let mut cur = root.to_path_buf();
for c in rel.components() {
let sc = cur.join("cgroup.subtree_control");
if sc.exists() {
write_with_timeout(&sc, &line, CGROUP_WRITE_TIMEOUT).with_context(|| {
format!("enable controllers '{line}' at {}", sc.display())
})?;
}
cur = cur.join(c);
}
let sc = self.parent.join("cgroup.subtree_control");
if sc.exists() {
write_with_timeout(&sc, &line, CGROUP_WRITE_TIMEOUT)
.with_context(|| format!("enable controllers '{line}' at {}", sc.display()))?;
}
}
Ok(())
}
pub fn create_cgroup(&self, name: &str) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name);
if !p.exists() {
fs::create_dir_all(&p).with_context(|| format!("mkdir {}", p.display()))?;
}
self.enable_subtree_cpuset(name);
Ok(())
}
pub fn add_parent_subtree_controller(&self, controller: &str) -> Result<()> {
let p = self.parent.join("cgroup.subtree_control");
if !p.exists() {
return Ok(());
}
write_with_timeout(&p, &format!("+{controller}"), CGROUP_WRITE_TIMEOUT)
}
pub fn remove_cgroup(&self, name: &str) -> Result<()> {
validate_cgroup_name(name)?;
let outstanding = self.outstanding_removes.load(Ordering::Relaxed);
if outstanding > MAX_OUTSTANDING_REMOVES {
bail!(
"remove_cgroup '{name}' refused: {outstanding} cgroups outstanding \
(cap {MAX_OUTSTANDING_REMOVES}); cgroup.procs draining wedged or \
churn loop outpacing the kernel's RCU grace period — bailing to \
avoid unbounded cgroupfs accumulation"
);
}
let p = self.parent.join(name);
if !p.exists() {
return Ok(());
}
match self.remove_cgroup_inner(name, &p) {
Ok(()) => {
self.outstanding_removes
.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |n| {
Some(n.saturating_sub(1))
})
.ok();
Ok(())
}
Err(err) => {
self.outstanding_removes.fetch_add(1, Ordering::Relaxed);
Err(err)
}
}
}
fn remove_cgroup_inner(&self, name: &str, p: &Path) -> Result<()> {
if p.join("cgroup.freeze").exists()
&& let Err(err) = self.set_freeze(name, false)
&& anyhow_first_io_errno(&err) != Some(libc::ENOENT)
{
tracing::warn!(
cgroup = name,
err = %format!("{err:#}"),
"remove_cgroup: pre-drain unfreeze failed; drain may strand frozen tasks at root"
);
}
self.drain_tasks(name)?;
std::thread::sleep(std::time::Duration::from_millis(50));
fs::remove_dir(p).with_context(|| format!("rmdir {}", p.display()))
}
pub fn set_cpuset(&self, name: &str, cpus: &BTreeSet<usize>) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("cpuset.cpus");
match write_with_timeout(&p, &TestTopology::cpuset_string(cpus), CGROUP_WRITE_TIMEOUT) {
Ok(()) => Ok(()),
Err(e) => {
let snapshot = capture_cpuset_state(&self.parent, name);
Err(e.context(snapshot))
}
}
}
fn enable_subtree_cpuset(&self, name: &str) {
let components: Vec<&str> = name.split('/').collect();
if components.len() < 2 {
return;
}
let mut cur = self.parent.clone();
for c in &components[..components.len() - 1] {
let sc = cur.join("cgroup.subtree_control");
if sc.exists()
&& let Err(e) = write_with_timeout(&sc, "+cpuset", CGROUP_WRITE_TIMEOUT)
{
tracing::warn!(path = %sc.display(), err = %e, "failed to enable cpuset");
}
cur = cur.join(c);
}
let sc = cur.join("cgroup.subtree_control");
if sc.exists()
&& let Err(e) = write_with_timeout(&sc, "+cpuset", CGROUP_WRITE_TIMEOUT)
{
tracing::warn!(path = %sc.display(), err = %e, "failed to enable cpuset");
}
}
pub fn clear_cpuset(&self, name: &str) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("cpuset.cpus");
write_with_timeout(&p, "", CGROUP_WRITE_TIMEOUT)
}
pub fn set_cpuset_mems(&self, name: &str, nodes: &BTreeSet<usize>) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("cpuset.mems");
write_with_timeout(
&p,
&TestTopology::cpuset_string(nodes),
CGROUP_WRITE_TIMEOUT,
)
}
pub fn clear_cpuset_mems(&self, name: &str) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("cpuset.mems");
write_with_timeout(&p, "", CGROUP_WRITE_TIMEOUT)
}
pub fn set_cpu_max(&self, name: &str, quota_us: Option<u64>, period_us: u64) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("cpu.max");
let line = match quota_us {
Some(q) => format!("{q} {period_us}"),
None => format!("max {period_us}"),
};
write_with_timeout(&p, &line, CGROUP_WRITE_TIMEOUT)
}
pub fn set_cpu_weight(&self, name: &str, weight: u32) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("cpu.weight");
write_with_timeout(&p, &weight.to_string(), CGROUP_WRITE_TIMEOUT)
}
pub fn set_memory_max(&self, name: &str, bytes: Option<u64>) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("memory.max");
let line = match bytes {
Some(b) => b.to_string(),
None => "max".to_string(),
};
write_with_timeout(&p, &line, CGROUP_WRITE_TIMEOUT)
}
pub fn set_memory_high(&self, name: &str, bytes: Option<u64>) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("memory.high");
let line = match bytes {
Some(b) => b.to_string(),
None => "max".to_string(),
};
write_with_timeout(&p, &line, CGROUP_WRITE_TIMEOUT)
}
pub fn set_memory_low(&self, name: &str, bytes: Option<u64>) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("memory.low");
let line = match bytes {
Some(b) => b.to_string(),
None => "0".to_string(),
};
write_with_timeout(&p, &line, CGROUP_WRITE_TIMEOUT)
}
pub fn set_io_weight(&self, name: &str, weight: u16) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("io.weight");
write_with_timeout(&p, &weight.to_string(), CGROUP_WRITE_TIMEOUT)
}
pub fn set_freeze(&self, name: &str, frozen: bool) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("cgroup.freeze");
let line = if frozen { "1" } else { "0" };
write_with_timeout(&p, line, CGROUP_WRITE_TIMEOUT)
}
pub fn set_pids_max(&self, name: &str, max: Option<u64>) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("pids.max");
let line = match max {
Some(n) => n.to_string(),
None => "max".to_string(),
};
write_with_timeout(&p, &line, CGROUP_WRITE_TIMEOUT)
}
pub fn set_memory_swap_max(&self, name: &str, bytes: Option<u64>) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("memory.swap.max");
let line = match bytes {
Some(b) => b.to_string(),
None => "max".to_string(),
};
write_with_timeout(&p, &line, CGROUP_WRITE_TIMEOUT)
}
pub fn move_task(&self, name: &str, pid: libc::pid_t) -> Result<()> {
validate_cgroup_name(name)?;
self.check_cpuset_ordering(name)?;
let p = self.parent.join(name).join("cgroup.procs");
write_with_timeout(&p, &pid.to_string(), CGROUP_WRITE_TIMEOUT)
}
fn check_cpuset_ordering(&self, name: &str) -> Result<()> {
let cpus_path = self.parent.join(name).join("cpuset.cpus");
let mems_effective_path = self.parent.join(name).join("cpuset.mems.effective");
let cpus = match fs::read_to_string(&cpus_path) {
Ok(s) => s,
Err(_) => return Ok(()),
};
if cpus.trim().is_empty() {
return Ok(());
}
let mems_effective = match fs::read_to_string(&mems_effective_path) {
Ok(s) => s,
Err(_) => return Ok(()),
};
if mems_effective.trim().is_empty() {
bail!(
"move_task into '{name}' refused: cpuset.cpus is set ({}) \
but cpuset.mems.effective reads empty — half-configured \
cgroup. The kernel's behavior here is path-dependent \
(guarantee_online_mems walks up to find a non-empty \
ancestor mask; the empty-nodemask OOM path is reachable \
only in degenerate hierarchies), but the framework \
surfaces a focused error rather than letting the \
migration through. Call set_cpuset_mems on this cgroup \
or widen an ancestor's cpuset.mems before move_task",
cpus.trim(),
);
}
Ok(())
}
pub fn move_tasks(&self, name: &str, pids: &[libc::pid_t]) -> Result<()> {
validate_cgroup_name(name)?;
for &pid in pids {
if let Err(e) = self.move_task_with_retry(name, pid) {
if is_esrch(&e) {
tracing::warn!(pid, cgroup = name, "task vanished during migration");
continue;
}
return Err(e);
}
}
Ok(())
}
fn move_task_with_retry(&self, name: &str, pid: libc::pid_t) -> Result<()> {
const MAX_RETRIES: u32 = 3;
const RETRY_DELAY: Duration = Duration::from_millis(100);
for attempt in 0..MAX_RETRIES {
match self.move_task(name, pid) {
Ok(()) => return Ok(()),
Err(e) if is_ebusy(&e) && attempt + 1 < MAX_RETRIES => {
tracing::debug!(
pid,
cgroup = name,
attempt = attempt + 1,
"EBUSY on cgroup.procs write, retrying"
);
std::thread::sleep(RETRY_DELAY);
}
Err(e) => return Err(e),
}
}
unreachable!()
}
pub fn clear_subtree_control(&self, name: &str) -> Result<()> {
validate_cgroup_name(name)?;
let p = self.parent.join(name).join("cgroup.subtree_control");
if !p.exists() {
return Ok(());
}
let content = fs::read_to_string(&p).with_context(|| format!("read {}", p.display()))?;
let content = content.trim();
if content.is_empty() {
return Ok(());
}
let disable: Vec<String> = content
.split_whitespace()
.map(|c| format!("-{c}"))
.collect();
let disable_str = disable.join(" ");
write_with_timeout(&p, &disable_str, CGROUP_WRITE_TIMEOUT)
.with_context(|| format!("clear subtree_control on {name}"))
}
pub fn drain_tasks(&self, name: &str) -> Result<()> {
validate_cgroup_name(name)?;
let src = self.parent.join(name).join("cgroup.procs");
if !src.exists() {
return Ok(());
}
drain_pids_to_root(&src, name);
Ok(())
}
pub fn cleanup_all(&self) -> Result<()> {
if !self.parent.exists() {
return Ok(());
}
if let Err(err) = for_each_child_dir(&self.parent, "cleanup_all", cleanup_recursive) {
tracing::warn!(
parent = %self.parent.display(),
err = %err,
"cleanup_all: read_dir failed; child cgroups may remain under parent",
);
}
Ok(())
}
}
pub trait CgroupOps {
fn parent_path(&self) -> &Path;
fn setup(&self, controllers: &BTreeSet<Controller>) -> Result<()>;
fn create_cgroup(&self, name: &str) -> Result<()>;
fn remove_cgroup(&self, name: &str) -> Result<()>;
fn set_cpuset(&self, name: &str, cpus: &BTreeSet<usize>) -> Result<()>;
fn clear_cpuset(&self, name: &str) -> Result<()>;
fn set_cpuset_mems(&self, name: &str, nodes: &BTreeSet<usize>) -> Result<()>;
fn clear_cpuset_mems(&self, name: &str) -> Result<()>;
fn set_cpu_max(&self, name: &str, quota_us: Option<u64>, period_us: u64) -> Result<()>;
fn set_cpu_weight(&self, name: &str, weight: u32) -> Result<()>;
fn set_memory_max(&self, name: &str, bytes: Option<u64>) -> Result<()>;
fn set_memory_high(&self, name: &str, bytes: Option<u64>) -> Result<()>;
fn set_memory_low(&self, name: &str, bytes: Option<u64>) -> Result<()>;
fn set_io_weight(&self, name: &str, weight: u16) -> Result<()>;
fn set_freeze(&self, name: &str, frozen: bool) -> Result<()>;
fn set_pids_max(&self, name: &str, max: Option<u64>) -> Result<()>;
fn set_memory_swap_max(&self, name: &str, bytes: Option<u64>) -> Result<()>;
fn move_task(&self, name: &str, pid: libc::pid_t) -> Result<()>;
fn move_tasks(&self, name: &str, pids: &[libc::pid_t]) -> Result<()>;
fn clear_subtree_control(&self, name: &str) -> Result<()>;
fn drain_tasks(&self, name: &str) -> Result<()>;
fn cleanup_all(&self) -> Result<()>;
}
impl CgroupOps for CgroupManager {
fn parent_path(&self) -> &Path {
CgroupManager::parent_path(self)
}
fn setup(&self, controllers: &BTreeSet<Controller>) -> Result<()> {
CgroupManager::setup(self, controllers)
}
fn create_cgroup(&self, name: &str) -> Result<()> {
CgroupManager::create_cgroup(self, name)
}
fn remove_cgroup(&self, name: &str) -> Result<()> {
CgroupManager::remove_cgroup(self, name)
}
fn set_cpuset(&self, name: &str, cpus: &BTreeSet<usize>) -> Result<()> {
CgroupManager::set_cpuset(self, name, cpus)
}
fn clear_cpuset(&self, name: &str) -> Result<()> {
CgroupManager::clear_cpuset(self, name)
}
fn set_cpuset_mems(&self, name: &str, nodes: &BTreeSet<usize>) -> Result<()> {
CgroupManager::set_cpuset_mems(self, name, nodes)
}
fn clear_cpuset_mems(&self, name: &str) -> Result<()> {
CgroupManager::clear_cpuset_mems(self, name)
}
fn set_cpu_max(&self, name: &str, quota_us: Option<u64>, period_us: u64) -> Result<()> {
CgroupManager::set_cpu_max(self, name, quota_us, period_us)
}
fn set_cpu_weight(&self, name: &str, weight: u32) -> Result<()> {
CgroupManager::set_cpu_weight(self, name, weight)
}
fn set_memory_max(&self, name: &str, bytes: Option<u64>) -> Result<()> {
CgroupManager::set_memory_max(self, name, bytes)
}
fn set_memory_high(&self, name: &str, bytes: Option<u64>) -> Result<()> {
CgroupManager::set_memory_high(self, name, bytes)
}
fn set_memory_low(&self, name: &str, bytes: Option<u64>) -> Result<()> {
CgroupManager::set_memory_low(self, name, bytes)
}
fn set_io_weight(&self, name: &str, weight: u16) -> Result<()> {
CgroupManager::set_io_weight(self, name, weight)
}
fn set_freeze(&self, name: &str, frozen: bool) -> Result<()> {
CgroupManager::set_freeze(self, name, frozen)
}
fn set_pids_max(&self, name: &str, max: Option<u64>) -> Result<()> {
CgroupManager::set_pids_max(self, name, max)
}
fn set_memory_swap_max(&self, name: &str, bytes: Option<u64>) -> Result<()> {
CgroupManager::set_memory_swap_max(self, name, bytes)
}
fn move_task(&self, name: &str, pid: libc::pid_t) -> Result<()> {
CgroupManager::move_task(self, name, pid)
}
fn move_tasks(&self, name: &str, pids: &[libc::pid_t]) -> Result<()> {
CgroupManager::move_tasks(self, name, pids)
}
fn clear_subtree_control(&self, name: &str) -> Result<()> {
CgroupManager::clear_subtree_control(self, name)
}
fn drain_tasks(&self, name: &str) -> Result<()> {
CgroupManager::drain_tasks(self, name)
}
fn cleanup_all(&self) -> Result<()> {
CgroupManager::cleanup_all(self)
}
}
fn drain_pids_to_root(procs_path: &Path, context: &str) {
let dst = Path::new("/sys/fs/cgroup/cgroup.procs");
let content = match fs::read_to_string(procs_path) {
Ok(c) => c,
Err(e) => {
tracing::warn!(
path = %procs_path.display(),
cgroup = context,
err = %e,
"drain_pids_to_root: read_to_string failed; tasks may remain in cgroup",
);
return;
}
};
for line in content.lines() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
let pid: u32 = match trimmed.parse() {
Ok(p) => p,
Err(e) => {
tracing::warn!(
path = %procs_path.display(),
cgroup = context,
line = trimmed,
err = %e,
"drain_pids_to_root: malformed pid line; skipping",
);
continue;
}
};
if let Err(e) = write_with_timeout(dst, &pid.to_string(), CGROUP_WRITE_TIMEOUT)
&& !is_esrch(&e)
{
tracing::warn!(pid, cgroup = context, err = %e, "failed to drain task");
}
}
}
fn for_each_child_dir(path: &Path, context: &str, mut f: impl FnMut(&Path)) -> std::io::Result<()> {
for entry in fs::read_dir(path)? {
let entry = match entry {
Ok(e) => e,
Err(err) => {
tracing::warn!(
path = %path.display(),
err = %err,
"{context}: dir entry read failed; skipping",
);
continue;
}
};
match entry.file_type() {
Ok(t) if t.is_dir() => f(&entry.path()),
Ok(_) => {}
Err(err) => tracing::warn!(
path = %entry.path().display(),
err = %err,
"{context}: file_type read failed; skipping entry",
),
}
}
Ok(())
}
fn cleanup_recursive(path: &std::path::Path) {
if let Err(err) = for_each_child_dir(path, "cleanup_recursive", cleanup_recursive) {
tracing::warn!(
path = %path.display(),
err = %err,
"cleanup_recursive: read_dir failed; child cgroups may remain",
);
}
let freeze_path = path.join("cgroup.freeze");
if freeze_path.exists()
&& let Err(err) = write_with_timeout(&freeze_path, "0", CGROUP_WRITE_TIMEOUT)
{
tracing::warn!(
path = %path.display(),
err = %format!("{err:#}"),
"cleanup_recursive: pre-drain unfreeze failed; source-cgroup state-hygiene step skipped",
);
}
drain_pids_to_root(&path.join("cgroup.procs"), &path.display().to_string());
std::thread::sleep(std::time::Duration::from_millis(10));
if let Err(err) = fs::remove_dir(path) {
tracing::warn!(
path = %path.display(),
err = %err,
"cleanup_recursive: remove_dir failed; cgroup directory may remain",
);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cgroup_manager_path() {
let cg = CgroupManager::new("/sys/fs/cgroup/test");
assert_eq!(
cg.parent_path(),
std::path::Path::new("/sys/fs/cgroup/test")
);
}
#[test]
fn create_cgroup_in_tmpdir() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-test-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.create_cgroup("test_cg").unwrap();
assert!(dir.join("test_cg").exists());
cg.create_cgroup("nested/deep").unwrap();
assert!(dir.join("nested/deep").exists());
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn create_cgroup_idempotent() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-idem-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.create_cgroup("cg_0").unwrap();
cg.create_cgroup("cg_0").unwrap(); assert!(dir.join("cg_0").exists());
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn cleanup_all_on_nonexistent() {
let cg = CgroupManager::new("/nonexistent/ktstr-test-path");
assert!(cg.cleanup_all().is_ok());
}
#[test]
fn remove_cgroup_nonexistent() {
let cg = CgroupManager::new("/nonexistent/ktstr-test-path");
assert!(cg.remove_cgroup("no_such_cgroup").is_ok());
}
#[test]
fn cleanup_removes_child_dirs() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-clean-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.create_cgroup("a").unwrap();
cg.create_cgroup("b").unwrap();
cg.create_cgroup("c/deep").unwrap();
assert!(dir.join("a").exists());
assert!(dir.join("c/deep").exists());
cg.cleanup_all().unwrap();
assert!(!dir.join("a").exists());
assert!(!dir.join("b").exists());
assert!(!dir.join("c").exists());
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn drain_tasks_nonexistent_source() {
let cg = CgroupManager::new("/nonexistent/ktstr-drain-test");
assert!(cg.drain_tasks("missing_cgroup").is_ok());
}
#[test]
fn cleanup_all_skips_non_dir_entries() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-nondir-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.create_cgroup("cg_child").unwrap();
let stray_file = dir.join("stray.txt");
fs::write(&stray_file, b"do not descend").unwrap();
assert!(dir.join("cg_child").exists());
assert!(stray_file.exists());
cg.cleanup_all().unwrap();
assert!(
!dir.join("cg_child").exists(),
"cleanup_all should remove the child directory",
);
assert!(
stray_file.exists(),
"cleanup_all must not descend into or remove regular files",
);
assert_eq!(fs::read_to_string(&stray_file).unwrap(), "do not descend");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn cleanup_recursive_removes_nested_dirs_depth_first() {
let base = std::env::temp_dir().join(format!("ktstr-cg-nested-{}", std::process::id()));
let root = base.join("root");
fs::create_dir_all(root.join("mid").join("leaf")).unwrap();
fs::create_dir_all(root.join("sibling")).unwrap();
assert!(root.join("mid/leaf").exists());
assert!(root.join("sibling").exists());
cleanup_recursive(&root);
assert!(
!root.exists(),
"cleanup_recursive should remove root and every descendant",
);
let _ = fs::remove_dir_all(&base);
}
#[test]
fn setup_non_cgroup_path() {
let dir = std::env::temp_dir().join(format!("ktstr-setup-{}", std::process::id()));
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.setup(&BTreeSet::new()).unwrap();
assert!(dir.exists());
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn setup_writes_requested_controllers_only() {
let root =
std::env::temp_dir().join(format!("ktstr-setup-controllers-{}", std::process::id()));
let parent = root.join("ktstr");
fs::create_dir_all(&parent).unwrap();
fs::write(root.join("cgroup.controllers"), "cpuset cpu memory pids io").unwrap();
fs::write(root.join("cgroup.subtree_control"), "").unwrap();
fs::write(parent.join("cgroup.subtree_control"), "").unwrap();
let cg = CgroupManager::new(parent.to_str().unwrap());
let mut requested = BTreeSet::new();
requested.insert(Controller::Cpuset);
requested.insert(Controller::Memory);
cg.setup_under_root(&requested, &root).unwrap();
let written = fs::read_to_string(parent.join("cgroup.subtree_control")).unwrap();
assert!(
written.contains("+cpuset"),
"subtree_control must contain +cpuset; got: {written:?}",
);
assert!(
written.contains("+memory"),
"subtree_control must contain +memory; got: {written:?}",
);
assert!(
!written.contains("+pids"),
"+pids must be absent when not requested; got: {written:?}",
);
assert!(
!written.contains("+io"),
"+io must be absent when not requested; got: {written:?}",
);
let cpu_positions: Vec<usize> = written.match_indices("+cpu").map(|(i, _)| i).collect();
for pos in cpu_positions {
let suffix = &written[pos..];
assert!(
suffix.starts_with("+cpuset"),
"+cpu must be absent when not requested (only +cpuset allowed); \
got '{suffix}' at pos {pos} in {written:?}",
);
}
let _ = fs::remove_dir_all(&root);
}
#[test]
fn setup_rejects_unavailable_controller() {
let root = std::env::temp_dir().join(format!("ktstr-setup-unavail-{}", std::process::id()));
let parent = root.join("ktstr");
fs::create_dir_all(&parent).unwrap();
fs::write(root.join("cgroup.controllers"), "memory").unwrap();
fs::write(root.join("cgroup.subtree_control"), "").unwrap();
fs::write(parent.join("cgroup.subtree_control"), "").unwrap();
let cg = CgroupManager::new(parent.to_str().unwrap());
let mut requested = BTreeSet::new();
requested.insert(Controller::Cpuset);
let err = cg.setup_under_root(&requested, &root).unwrap_err();
let msg = format!("{err:#}");
assert!(
msg.contains("cpuset") && msg.contains("not available"),
"error must cite missing 'cpuset' and 'not available'; got {msg:?}",
);
let _ = fs::remove_dir_all(&root);
}
#[test]
fn write_with_timeout_success() {
let dir = std::env::temp_dir().join(format!("ktstr-wt-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let f = dir.join("test_write");
write_with_timeout(&f, "hello", Duration::from_secs(5)).unwrap();
assert_eq!(fs::read_to_string(&f).unwrap(), "hello");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn write_with_timeout_bad_path() {
let f = Path::new("/nonexistent/dir/file");
assert!(write_with_timeout(f, "data", Duration::from_secs(5)).is_err());
}
#[test]
fn move_task_nonexistent_cgroup() {
let cg = CgroupManager::new("/nonexistent/ktstr-move-test");
assert!(cg.move_task("no_cgroup", 1).is_err());
}
#[test]
fn set_cpuset_empty() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-cpuset-{}", std::process::id()));
let dir_a = dir.join("cg_a");
fs::create_dir_all(&dir_a).unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.set_cpuset("cg_a", &BTreeSet::new()).unwrap();
assert_eq!(fs::read_to_string(dir_a.join("cpuset.cpus")).unwrap(), "");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn move_tasks_partial_failure() {
let cg = CgroupManager::new("/nonexistent/ktstr-partial");
let err = cg.move_tasks("cg", &[1, 2, 3]).unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("cgroup.procs"), "unexpected error: {msg}");
}
#[test]
fn drain_tasks_empty_cgroup() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-drain-{}", std::process::id()));
let dir_d = dir.join("cg_d");
fs::create_dir_all(&dir_d).unwrap();
fs::write(dir_d.join("cgroup.procs"), "").unwrap();
fs::write(dir.join("cgroup.procs"), "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
assert!(cg.drain_tasks("cg_d").is_ok());
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn is_esrch_detects_esrch_in_chain() {
let io_err = std::io::Error::from_raw_os_error(libc::ESRCH);
let anyhow_err = anyhow::Error::new(io_err).context("write cgroup.procs");
assert!(is_esrch(&anyhow_err));
}
#[test]
fn is_esrch_rejects_enoent() {
let io_err = std::io::Error::from_raw_os_error(libc::ENOENT);
let anyhow_err = anyhow::Error::new(io_err).context("write cgroup.procs");
assert!(!is_esrch(&anyhow_err));
}
#[test]
fn is_ebusy_detects_ebusy_in_chain() {
let io_err = std::io::Error::from_raw_os_error(libc::EBUSY);
let anyhow_err = anyhow::Error::new(io_err).context("write cgroup.procs");
assert!(is_ebusy(&anyhow_err));
}
#[test]
fn is_ebusy_rejects_esrch() {
let io_err = std::io::Error::from_raw_os_error(libc::ESRCH);
let anyhow_err = anyhow::Error::new(io_err).context("write cgroup.procs");
assert!(!is_ebusy(&anyhow_err));
}
#[test]
fn clear_subtree_control_nonexistent() {
let cg = CgroupManager::new("/nonexistent/ktstr-clear-sc");
assert!(cg.clear_subtree_control("cg_0").is_ok());
}
#[test]
fn clear_subtree_control_empty() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-sc-{}", std::process::id()));
let dir_a = dir.join("cg_a");
fs::create_dir_all(&dir_a).unwrap();
fs::write(dir_a.join("cgroup.subtree_control"), "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
assert!(cg.clear_subtree_control("cg_a").is_ok());
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn write_with_timeout_blocks_on_fifo() {
use std::ffi::CString;
let dir = std::env::temp_dir().join(format!("ktstr-cg-fifo-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let fifo_path = dir.join("blocked_write");
let c_path = CString::new(fifo_path.to_str().unwrap()).unwrap();
let rc = unsafe { libc::mkfifo(c_path.as_ptr(), 0o700) };
assert_eq!(rc, 0, "mkfifo failed: {}", std::io::Error::last_os_error());
let err = write_with_timeout(&fifo_path, "data", Duration::from_millis(50)).unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("timed out"), "unexpected error: {msg}");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn anyhow_first_io_errno_extracts_raw_errno() {
let io = std::io::Error::from_raw_os_error(libc::EBUSY);
let err = anyhow::Error::new(io);
assert_eq!(anyhow_first_io_errno(&err), Some(libc::EBUSY));
}
#[test]
fn anyhow_first_io_errno_through_context() {
let io = std::io::Error::from_raw_os_error(libc::ESRCH);
let err = anyhow::Error::new(io).context("wrapping context");
assert_eq!(anyhow_first_io_errno(&err), Some(libc::ESRCH));
}
#[test]
fn anyhow_first_io_errno_no_io_returns_none() {
let err = anyhow::anyhow!("plain text error");
assert_eq!(anyhow_first_io_errno(&err), None);
}
#[test]
fn add_parent_subtree_controller_missing_file_noop() {
let cg = CgroupManager::new("/nonexistent/ktstr-add-parent-sc");
assert!(cg.add_parent_subtree_controller("cpuset").is_ok());
}
#[test]
fn add_parent_subtree_controller_writes_plus_prefixed_token() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-addparent-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let sc = dir.join("cgroup.subtree_control");
fs::write(&sc, "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.add_parent_subtree_controller("cpuset").unwrap();
assert_eq!(fs::read_to_string(&sc).unwrap(), "+cpuset");
let _ = fs::remove_dir_all(&dir);
}
fn make_test_cgroup(label: &str) -> (PathBuf, CgroupManager) {
let dir = std::env::temp_dir().join(format!("ktstr-cg-{label}-{}", std::process::id()));
fs::create_dir_all(dir.join("cg_x")).unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
(dir, cg)
}
#[test]
fn set_cpu_max_writes_quota_and_period_when_some() {
let (dir, cg) = make_test_cgroup("cpu-max-some");
let target = dir.join("cg_x").join("cpu.max");
fs::write(&target, "").unwrap();
cg.set_cpu_max("cg_x", Some(50_000), 100_000).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "50000 100000");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_cpu_max_writes_max_keyword_when_none() {
let (dir, cg) = make_test_cgroup("cpu-max-none");
let target = dir.join("cg_x").join("cpu.max");
fs::write(&target, "").unwrap();
cg.set_cpu_max("cg_x", None, 100_000).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "max 100000");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_cpu_weight_writes_decimal_value() {
let (dir, cg) = make_test_cgroup("cpu-weight");
let target = dir.join("cg_x").join("cpu.weight");
fs::write(&target, "").unwrap();
cg.set_cpu_weight("cg_x", 250).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "250");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_memory_max_writes_bytes_or_max_keyword() {
let (dir, cg) = make_test_cgroup("mem-max");
let target = dir.join("cg_x").join("memory.max");
fs::write(&target, "").unwrap();
cg.set_memory_max("cg_x", Some(1_048_576)).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "1048576");
cg.set_memory_max("cg_x", None).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "max");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_memory_high_writes_bytes_or_max_keyword() {
let (dir, cg) = make_test_cgroup("mem-high");
let target = dir.join("cg_x").join("memory.high");
fs::write(&target, "").unwrap();
cg.set_memory_high("cg_x", Some(524_288)).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "524288");
cg.set_memory_high("cg_x", None).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "max");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_memory_low_writes_bytes_or_zero() {
let (dir, cg) = make_test_cgroup("mem-low");
let target = dir.join("cg_x").join("memory.low");
fs::write(&target, "").unwrap();
cg.set_memory_low("cg_x", Some(2_048)).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "2048");
cg.set_memory_low("cg_x", None).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "0");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_io_weight_writes_decimal_value() {
let (dir, cg) = make_test_cgroup("io-weight");
let target = dir.join("cg_x").join("io.weight");
fs::write(&target, "").unwrap();
cg.set_io_weight("cg_x", 500).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "500");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_freeze_writes_zero_or_one() {
let (dir, cg) = make_test_cgroup("freeze");
let target = dir.join("cg_x").join("cgroup.freeze");
fs::write(&target, "").unwrap();
cg.set_freeze("cg_x", true).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "1");
cg.set_freeze("cg_x", false).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "0");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_pids_max_writes_decimal_or_max_keyword() {
let (dir, cg) = make_test_cgroup("pids-max");
let target = dir.join("cg_x").join("pids.max");
fs::write(&target, "").unwrap();
cg.set_pids_max("cg_x", Some(1024)).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "1024");
cg.set_pids_max("cg_x", None).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "max");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_memory_swap_max_writes_bytes_or_max_keyword() {
let (dir, cg) = make_test_cgroup("mem-swap-max");
let target = dir.join("cg_x").join("memory.swap.max");
fs::write(&target, "").unwrap();
cg.set_memory_swap_max("cg_x", Some(2 * 1024 * 1024))
.unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "2097152");
cg.set_memory_swap_max("cg_x", None).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "max");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn validate_cgroup_name_rejects_unsafe_shapes() {
for (name, reason) in [
("", "empty"),
("/abs", "starts with '/'"),
("nul\0byte", "NUL byte"),
(".hidden", "leading-dot component"),
("..", "'..' component"),
("a/..", "'..' component"),
("../escape", "'..' component"),
(".", "'.' component"),
("a//b", "empty path component"),
("ok/.dotfile", "leading-dot component"),
] {
let err =
validate_cgroup_name(name).expect_err(&format!("must reject {name:?} ({reason})"));
assert!(
err.to_string().contains(reason),
"error for {name:?} must mention {reason:?}; got: {err:#}"
);
}
}
#[test]
fn validate_cgroup_name_accepts_valid_shapes() {
for name in [
"cg_0",
"cg-1",
"cg.0",
"cg_0/narrow",
"level1/level2/level3",
"a.b.c",
"x",
] {
validate_cgroup_name(name).unwrap_or_else(|e| {
panic!("must accept legitimate name {name:?}; got: {e:#}");
});
}
}
#[test]
fn cgroup_methods_reject_bad_names_before_fs_writes() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-badname-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
let bad = "../escape";
let err = cg.create_cgroup(bad).unwrap_err();
assert!(err.to_string().contains("'..' component"));
let err = cg.set_freeze(bad, true).unwrap_err();
assert!(err.to_string().contains("'..' component"));
let err = cg.set_pids_max(bad, Some(10)).unwrap_err();
assert!(err.to_string().contains("'..' component"));
let err = cg.set_memory_swap_max(bad, Some(1024)).unwrap_err();
assert!(err.to_string().contains("'..' component"));
let err = cg.set_cpuset_mems(bad, &BTreeSet::new()).unwrap_err();
assert!(err.to_string().contains("'..' component"));
let err = cg.move_task(bad, 1).unwrap_err();
assert!(err.to_string().contains("'..' component"));
let err = cg.drain_tasks(bad).unwrap_err();
assert!(err.to_string().contains("'..' component"));
let err = cg.remove_cgroup(bad).unwrap_err();
assert!(err.to_string().contains("'..' component"));
let escape_marker = dir.join("escape");
assert!(
!escape_marker.exists(),
"validator must bail before fs writes; saw {escape_marker:?}"
);
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn setup_under_root_outside_root_creates_dir_and_skips_walk() {
let outside = std::env::temp_dir().join(format!("ktstr-out-{}", std::process::id()));
let unrelated_root =
std::env::temp_dir().join(format!("ktstr-other-{}", std::process::id()));
fs::create_dir_all(&unrelated_root).unwrap();
let cg = CgroupManager::new(outside.to_str().unwrap());
let mut requested = BTreeSet::new();
requested.insert(Controller::Cpuset);
cg.setup_under_root(&requested, &unrelated_root).unwrap();
assert!(outside.exists(), "setup must create the parent directory");
assert!(
!outside.join("cgroup.subtree_control").exists(),
"no subtree_control walk should fire when the parent is not under root"
);
let _ = fs::remove_dir_all(&outside);
let _ = fs::remove_dir_all(&unrelated_root);
}
#[test]
fn set_freeze_is_idempotent_when_already_in_target_state() {
let (dir, cg) = make_test_cgroup("freeze-idem");
let target = dir.join("cg_x").join("cgroup.freeze");
fs::write(&target, "").unwrap();
cg.set_freeze("cg_x", true).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "1");
cg.set_freeze("cg_x", true).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "1");
cg.set_freeze("cg_x", false).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "0");
cg.set_freeze("cg_x", false).unwrap();
assert_eq!(fs::read_to_string(&target).unwrap(), "0");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_pids_max_writes_u64_max_verbatim() {
let (dir, cg) = make_test_cgroup("pids-overflow");
let target = dir.join("cg_x").join("pids.max");
fs::write(&target, "").unwrap();
cg.set_pids_max("cg_x", Some(u64::MAX)).unwrap();
assert_eq!(
fs::read_to_string(&target).unwrap(),
u64::MAX.to_string(),
"u64::MAX must round-trip without narrowing or sign change"
);
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_memory_swap_max_writes_u64_max_verbatim() {
let (dir, cg) = make_test_cgroup("swap-overflow");
let target = dir.join("cg_x").join("memory.swap.max");
fs::write(&target, "").unwrap();
cg.set_memory_swap_max("cg_x", Some(u64::MAX)).unwrap();
assert_eq!(
fs::read_to_string(&target).unwrap(),
u64::MAX.to_string(),
"u64::MAX must round-trip without narrowing or sign change"
);
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn set_pids_max_returns_err_when_pids_max_file_missing() {
let cg = CgroupManager::new("/nonexistent/ktstr-pids-test");
let err = cg
.set_pids_max("cg_x", Some(1024))
.expect_err("missing pids.max must surface as Err");
let msg = format!("{err:#}");
assert!(
msg.contains("pids.max"),
"error chain must name the missing file: {msg}"
);
}
#[test]
fn set_memory_swap_max_returns_err_when_file_missing() {
let cg = CgroupManager::new("/nonexistent/ktstr-swap-test");
let err = cg
.set_memory_swap_max("cg_x", Some(2_000_000))
.expect_err("missing memory.swap.max must surface as Err");
let msg = format!("{err:#}");
assert!(
msg.contains("memory.swap.max"),
"error chain must name the missing file: {msg}"
);
}
#[test]
fn set_freeze_returns_err_with_enoent_when_freeze_file_missing() {
let cg = CgroupManager::new("/nonexistent/ktstr-freeze-test");
let err = cg
.set_freeze("cg_x", true)
.expect_err("missing cgroup.freeze must surface as Err");
assert_eq!(
anyhow_first_io_errno(&err),
Some(libc::ENOENT),
"ENOENT errno must be reachable from the error chain so \
remove_cgroup's auto-unfreeze can suppress it; got: {err:#}"
);
}
#[test]
fn remove_cgroup_auto_unfreezes_before_drain() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-autounf-{}", std::process::id()));
let inner = dir.join("cg_x");
fs::create_dir_all(&inner).unwrap();
let freeze_path = inner.join("cgroup.freeze");
fs::write(&freeze_path, "1").unwrap();
fs::write(inner.join("cgroup.procs"), "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
let _ = cg.remove_cgroup("cg_x");
assert_eq!(
fs::read_to_string(&freeze_path).unwrap(),
"0",
"remove_cgroup must write '0' to cgroup.freeze before draining"
);
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn remove_cgroup_tolerates_missing_freeze_file() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-nofrz-{}", std::process::id()));
let inner = dir.join("cg_x");
fs::create_dir_all(&inner).unwrap();
fs::write(inner.join("cgroup.procs"), "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
let _ = cg.remove_cgroup("cg_x");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn cleanup_recursive_auto_unfreezes_before_drain() {
let dir =
std::env::temp_dir().join(format!("ktstr-cleanup-rec-autounf-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let freeze_path = dir.join("cgroup.freeze");
fs::write(&freeze_path, "1").unwrap();
fs::write(dir.join("cgroup.procs"), "").unwrap();
cleanup_recursive(&dir);
assert_eq!(
fs::read_to_string(&freeze_path).unwrap(),
"0",
"cleanup_recursive must write '0' to cgroup.freeze before draining \
(mirrors remove_cgroup auto-unfreeze for state hygiene)",
);
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn remove_cgroup_increments_outstanding_on_failure() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-outstanding-{}", std::process::id()));
let inner = dir.join("cg_x");
fs::create_dir_all(&inner).unwrap();
fs::write(inner.join("cgroup.procs"), "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
assert_eq!(cg.outstanding_removes(), 0);
let _ = cg.remove_cgroup("cg_x");
assert_eq!(
cg.outstanding_removes(),
1,
"outstanding_removes must increment when rmdir fails"
);
fs::create_dir_all(&inner).unwrap();
fs::write(inner.join("cgroup.procs"), "").unwrap();
let _ = cg.remove_cgroup("cg_x");
assert_eq!(
cg.outstanding_removes(),
2,
"outstanding_removes must increment monotonically on repeat failures"
);
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn remove_cgroup_decrements_outstanding_on_success() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-decrement-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.outstanding_removes.store(3, Ordering::Relaxed);
let inner = dir.join("cg_clean");
fs::create_dir_all(&inner).unwrap();
cg.remove_cgroup("cg_clean").unwrap();
assert_eq!(
cg.outstanding_removes(),
2,
"successful remove must decrement outstanding_removes by 1"
);
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn remove_cgroup_bails_when_cap_exceeded() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-cap-{}", std::process::id()));
let inner = dir.join("cg_x");
fs::create_dir_all(&inner).unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.outstanding_removes
.store(MAX_OUTSTANDING_REMOVES + 1, Ordering::Relaxed);
let err = cg
.remove_cgroup("cg_x")
.expect_err("cap-exceeded remove must surface as Err");
let msg = format!("{err:#}");
assert!(
msg.contains("outstanding") && msg.contains("cap"),
"error must cite the cap; got: {msg}"
);
assert!(
inner.exists(),
"cap-exceeded bail must not touch the filesystem"
);
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn remove_cgroup_missing_dir_does_not_touch_counter() {
let cg = CgroupManager::new("/nonexistent/ktstr-missing-counter");
cg.outstanding_removes.store(5, Ordering::Relaxed);
cg.remove_cgroup("no_such_cgroup").unwrap();
assert_eq!(
cg.outstanding_removes(),
5,
"missing-dir early return must not decrement the counter"
);
}
#[test]
fn move_task_refuses_when_cpuset_cpus_set_but_effective_mems_empty() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-cpuset-gate-{}", std::process::id()));
let inner = dir.join("cg_x");
fs::create_dir_all(&inner).unwrap();
fs::write(inner.join("cpuset.cpus"), "0-1").unwrap();
fs::write(inner.join("cpuset.mems.effective"), "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
let err = cg
.move_task("cg_x", 1)
.expect_err("half-configured cpuset must refuse move_task");
let msg = format!("{err:#}");
assert!(
msg.contains("cpuset.mems.effective") && msg.contains("set_cpuset_mems"),
"error must cite cpuset.mems.effective and direct caller to set_cpuset_mems; got: {msg}"
);
let procs_path = inner.join("cgroup.procs");
assert!(
!procs_path.exists(),
"gate must bail before any cgroup.procs write; cgroup.procs exists at {procs_path:?}"
);
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn move_task_admits_when_cpus_set_and_effective_mems_non_empty() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-cpuset-ok-{}", std::process::id()));
let inner = dir.join("cg_x");
fs::create_dir_all(&inner).unwrap();
fs::write(inner.join("cpuset.cpus"), "0-1").unwrap();
fs::write(inner.join("cpuset.mems.effective"), "0").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
fs::write(inner.join("cgroup.procs"), "").unwrap();
cg.move_task("cg_x", 1)
.expect("non-empty effective mems must admit move_task");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn move_task_admits_when_local_mems_empty_but_effective_inherited() {
let dir = std::env::temp_dir().join(format!(
"ktstr-cg-cpuset-inherit-mems-{}",
std::process::id()
));
let inner = dir.join("cg_x");
fs::create_dir_all(&inner).unwrap();
fs::write(inner.join("cpuset.cpus"), "0-1").unwrap();
fs::write(inner.join("cpuset.mems"), "").unwrap();
fs::write(inner.join("cpuset.mems.effective"), "0").unwrap();
fs::write(inner.join("cgroup.procs"), "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.move_task("cg_x", 1)
.expect("inherited effective mems must admit move_task");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn move_task_admits_when_cpuset_cpus_empty() {
let dir =
std::env::temp_dir().join(format!("ktstr-cg-cpuset-inherit-{}", std::process::id()));
let inner = dir.join("cg_x");
fs::create_dir_all(&inner).unwrap();
fs::write(inner.join("cpuset.cpus"), "").unwrap();
fs::write(inner.join("cpuset.mems.effective"), "").unwrap();
fs::write(inner.join("cgroup.procs"), "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.move_task("cg_x", 1)
.expect("inherit-cpuset cgroup must admit move_task");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn move_task_admits_when_cpuset_files_absent() {
let dir = std::env::temp_dir().join(format!("ktstr-cg-no-cpuset-{}", std::process::id()));
let inner = dir.join("cg_x");
fs::create_dir_all(&inner).unwrap();
fs::write(inner.join("cgroup.procs"), "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.move_task("cg_x", 1)
.expect("no-cpuset cgroup must admit move_task");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn move_task_admits_when_effective_mems_file_absent() {
let dir =
std::env::temp_dir().join(format!("ktstr-cg-no-effective-mems-{}", std::process::id()));
let inner = dir.join("cg_x");
fs::create_dir_all(&inner).unwrap();
fs::write(inner.join("cpuset.cpus"), "0-1").unwrap();
fs::write(inner.join("cgroup.procs"), "").unwrap();
let cg = CgroupManager::new(dir.to_str().unwrap());
cg.move_task("cg_x", 1)
.expect("missing cpuset.mems.effective must admit move_task (read-failure absorb)");
let _ = fs::remove_dir_all(&dir);
}
}