use crate::cgroup::{CgroupConfig, ResourceStats};
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
pub struct RootlessCgroup {
pub path: PathBuf,
}
pub fn self_cgroup_path() -> io::Result<PathBuf> {
parse_cgroup_path(&fs::read_to_string("/proc/self/cgroup")?)
}
fn parse_cgroup_path(contents: &str) -> io::Result<PathBuf> {
for line in contents.lines() {
if let Some(rest) = line.strip_prefix("0::") {
let rel = rest.trim_start_matches('/');
return Ok(PathBuf::from("/sys/fs/cgroup").join(rel));
}
}
Err(io::Error::new(
io::ErrorKind::NotFound,
"no cgroup v2 entry in /proc/self/cgroup",
))
}
pub fn is_delegation_available() -> bool {
let path = match self_cgroup_path() {
Ok(p) => p,
Err(_) => return false,
};
let controllers_path = path.join("cgroup.controllers");
let controllers = match fs::read_to_string(&controllers_path) {
Ok(c) => c,
Err(_) => return false,
};
controllers.contains("cpu") || controllers.contains("memory") || controllers.contains("pids")
}
fn available_controllers(parent: &Path) -> io::Result<String> {
fs::read_to_string(parent.join("cgroup.controllers"))
}
pub fn setup_rootless_cgroup(cfg: &CgroupConfig, child_pid: u32) -> io::Result<RootlessCgroup> {
let parent = self_cgroup_path()?;
let controllers = available_controllers(&parent)?;
let name = format!("pelagos-{}", child_pid);
let cg_path = parent.join(&name);
fs::create_dir(&cg_path)?;
let subtree_control = parent.join("cgroup.subtree_control");
for ctrl in ["memory", "cpu", "pids"] {
if controllers.contains(ctrl) {
let token = format!("+{}", ctrl);
if let Err(e) = fs::write(&subtree_control, &token) {
log::debug!("cgroup.subtree_control {}: {}", token, e);
}
}
}
let child_controllers = available_controllers(&cg_path).unwrap_or_default();
if child_controllers.contains("memory") {
if let Some(bytes) = cfg.memory_limit {
write_limit(&cg_path, "memory.max", &bytes.to_string())?;
}
if let Some(swap) = cfg.memory_swap {
write_limit(&cg_path, "memory.swap.max", &swap.to_string())?;
}
if let Some(res) = cfg.memory_reservation {
write_limit(&cg_path, "memory.low", &res.to_string())?;
}
} else if cfg.memory_limit.is_some()
|| cfg.memory_swap.is_some()
|| cfg.memory_reservation.is_some()
{
log::warn!("memory controller not available in sub-cgroup, skipping memory limits");
}
if child_controllers.contains("cpu") {
if let Some((quota_us, period_us)) = cfg.cpu_quota {
write_limit(&cg_path, "cpu.max", &format!("{} {}", quota_us, period_us))?;
}
if let Some(shares) = cfg.cpu_shares {
write_limit(&cg_path, "cpu.weight", &shares.to_string())?;
}
} else if cfg.cpu_quota.is_some() || cfg.cpu_shares.is_some() {
log::warn!("cpu controller not available in sub-cgroup, skipping cpu limits");
}
if let Some(ref cpus) = cfg.cpuset_cpus {
let knob = cg_path.join("cpuset.cpus");
if knob.exists() {
if let Err(e) = fs::write(&knob, cpus) {
log::warn!("cpuset.cpus={} failed (non-fatal): {}", cpus, e);
}
}
}
if let Some(ref mems) = cfg.cpuset_mems {
let knob = cg_path.join("cpuset.mems");
if knob.exists() {
if let Err(e) = fs::write(&knob, mems) {
log::warn!("cpuset.mems={} failed (non-fatal): {}", mems, e);
}
}
}
if let Some(max) = cfg.pids_limit {
if child_controllers.contains("pids") {
write_limit(&cg_path, "pids.max", &max.to_string())?;
} else {
log::warn!("pids controller not available in sub-cgroup, skipping pids limit");
}
}
fs::write(cg_path.join("cgroup.procs"), child_pid.to_string())?;
log::info!("rootless cgroup created: {}", cg_path.display());
Ok(RootlessCgroup { path: cg_path })
}
fn write_limit(cg_path: &Path, knob: &str, value: &str) -> io::Result<()> {
fs::write(cg_path.join(knob), value).map_err(|e| {
io::Error::new(
e.kind(),
format!("writing {} to {}/{}: {}", value, cg_path.display(), knob, e),
)
})
}
pub fn read_rootless_stats(cg: &RootlessCgroup) -> io::Result<ResourceStats> {
let mut stats = ResourceStats::default();
if let Ok(raw) = fs::read_to_string(cg.path.join("memory.current")) {
if let Ok(bytes) = raw.trim().parse::<u64>() {
stats.memory_current_bytes = bytes;
}
}
if let Ok(raw) = fs::read_to_string(cg.path.join("cpu.stat")) {
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("usage_usec ") {
if let Ok(usec) = rest.trim().parse::<u64>() {
stats.cpu_usage_ns = usec.saturating_mul(1000);
}
break;
}
}
}
if let Ok(raw) = fs::read_to_string(cg.path.join("pids.current")) {
if let Ok(n) = raw.trim().parse::<u64>() {
stats.pids_current = n;
}
}
Ok(stats)
}
pub fn teardown_rootless_cgroup(cg: &RootlessCgroup) {
if let Err(e) = fs::remove_dir(&cg.path) {
log::warn!(
"rootless cgroup remove {} failed (non-fatal): {}",
cg.path.display(),
e
);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_cgroup_path() {
let input = "0::/user.slice/user-1000.slice/session-2.scope\n";
let path = parse_cgroup_path(input).unwrap();
assert_eq!(
path,
PathBuf::from("/sys/fs/cgroup/user.slice/user-1000.slice/session-2.scope")
);
}
#[test]
fn test_parse_cgroup_path_root() {
let input = "0::/\n";
let path = parse_cgroup_path(input).unwrap();
assert_eq!(path, PathBuf::from("/sys/fs/cgroup"));
}
#[test]
fn test_parse_cgroup_path_no_v2() {
let input = "1:name=systemd:/user.slice\n";
let err = parse_cgroup_path(input).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::NotFound);
}
#[test]
fn test_self_cgroup_path() {
if let Ok(path) = self_cgroup_path() {
assert!(
path.starts_with("/sys/fs/cgroup/"),
"expected /sys/fs/cgroup/ prefix, got: {}",
path.display()
);
assert!(
path.exists(),
"cgroup path does not exist: {}",
path.display()
);
}
}
}