use cgroups_rs::{
fs::MaxValue,
fs::{
cgroup_builder::CgroupBuilder, cpu::CpuController, cpuset::CpuSetController, hierarchies,
memory::MemController, net_cls::NetClsController, net_prio::NetPrioController,
pid::PidController, Cgroup,
},
CgroupPid,
};
use std::io;
#[derive(Debug, Clone, Default)]
pub struct CgroupConfig {
pub memory_limit: Option<i64>,
pub memory_swap: Option<i64>,
pub memory_reservation: Option<i64>,
pub memory_swappiness: Option<u64>,
pub cpu_shares: Option<u64>,
pub cpu_quota: Option<(i64, u64)>,
pub cpuset_cpus: Option<String>,
pub cpuset_mems: Option<String>,
pub pids_limit: Option<u64>,
pub blkio_weight: Option<u16>,
pub blkio_throttle_read_bps: Vec<(u64, u64, u64)>,
pub blkio_throttle_write_bps: Vec<(u64, u64, u64)>,
pub blkio_throttle_read_iops: Vec<(u64, u64, u64)>,
pub blkio_throttle_write_iops: Vec<(u64, u64, u64)>,
pub device_rules: Vec<CgroupDeviceRule>,
pub net_classid: Option<u64>,
pub net_priorities: Vec<(String, u64)>,
pub path: Option<String>,
}
#[derive(Debug, Clone)]
pub struct CgroupDeviceRule {
pub allow: bool,
pub kind: char,
pub major: i64,
pub minor: i64,
pub access: String,
}
pub fn setup_cgroup(cfg: &CgroupConfig, child_pid: u32) -> io::Result<Cgroup> {
let name = cfg
.path
.clone()
.unwrap_or_else(|| format!("pelagos-{}", child_pid));
let hier = hierarchies::auto();
let mut builder = CgroupBuilder::new(&name);
if cfg.memory_limit.is_some()
|| cfg.memory_swap.is_some()
|| cfg.memory_reservation.is_some()
|| cfg.memory_swappiness.is_some()
{
let mut mb = builder.memory();
if let Some(limit) = cfg.memory_limit {
mb = mb.memory_hard_limit(limit);
}
if let Some(swap) = cfg.memory_swap {
mb = mb.memory_swap_limit(swap);
}
if let Some(res) = cfg.memory_reservation {
mb = mb.memory_soft_limit(res);
}
if let Some(swp) = cfg.memory_swappiness {
mb = mb.swappiness(swp);
}
builder = mb.done();
}
let has_cpu = cfg.cpu_shares.is_some() || cfg.cpu_quota.is_some();
if has_cpu {
let mut cb = builder.cpu();
if let Some(shares) = cfg.cpu_shares {
cb = cb.shares(shares);
}
if let Some((quota, period)) = cfg.cpu_quota {
cb = cb.quota(quota).period(period);
}
builder = cb.done();
}
if let Some(max_pids) = cfg.pids_limit {
builder = builder
.pid()
.maximum_number_of_processes(MaxValue::Value(max_pids as i64))
.done();
}
let has_blkio = cfg.blkio_weight.is_some()
|| !cfg.blkio_throttle_read_bps.is_empty()
|| !cfg.blkio_throttle_write_bps.is_empty()
|| !cfg.blkio_throttle_read_iops.is_empty()
|| !cfg.blkio_throttle_write_iops.is_empty();
if has_blkio {
let mut bb = builder.blkio();
if let Some(w) = cfg.blkio_weight {
bb = bb.weight(w);
}
if !cfg.blkio_throttle_read_bps.is_empty() {
bb = bb.throttle_bps();
for &(major, minor, rate) in &cfg.blkio_throttle_read_bps {
bb = bb.read(major, minor, rate);
}
}
if !cfg.blkio_throttle_write_bps.is_empty() {
bb = bb.throttle_bps();
for &(major, minor, rate) in &cfg.blkio_throttle_write_bps {
bb = bb.write(major, minor, rate);
}
}
if !cfg.blkio_throttle_read_iops.is_empty() {
bb = bb.throttle_iops();
for &(major, minor, rate) in &cfg.blkio_throttle_read_iops {
bb = bb.read(major, minor, rate);
}
}
if !cfg.blkio_throttle_write_iops.is_empty() {
bb = bb.throttle_iops();
for &(major, minor, rate) in &cfg.blkio_throttle_write_iops {
bb = bb.write(major, minor, rate);
}
}
builder = bb.done();
}
let has_net = cfg.net_classid.is_some() || !cfg.net_priorities.is_empty();
if has_net {
let mut nb = builder.network();
if let Some(class_id) = cfg.net_classid {
nb = nb.class_id(class_id);
}
for (name, prio) in &cfg.net_priorities {
nb = nb.priority(name.clone(), *prio);
}
builder = nb.done();
}
if !cfg.device_rules.is_empty() {
use cgroups_rs::fs::devices::{DevicePermissions, DeviceType};
let mut db = builder.devices();
for rule in &cfg.device_rules {
let devtype = match rule.kind {
'b' => DeviceType::Block,
'c' => DeviceType::Char,
_ => DeviceType::All,
};
let access = DevicePermissions::from_str(&rule.access)
.unwrap_or_else(|_| DevicePermissions::all());
db = db.device(rule.major, rule.minor, devtype, rule.allow, access);
}
builder = db.done();
}
let cg = builder
.build(hier)
.map_err(|e| io::Error::other(format!("cgroup create '{}': {}", name, e)))?;
if cfg.cpuset_cpus.is_some() || cfg.cpuset_mems.is_some() {
if let Some(cs) = cg.controller_of::<CpuSetController>() {
if let Some(ref cpus) = cfg.cpuset_cpus {
if let Err(e) = cs.set_cpus(cpus) {
log::warn!("cgroup cpuset.cpus={} failed (non-fatal): {}", cpus, e);
}
}
if let Some(ref mems) = cfg.cpuset_mems {
if let Err(e) = cs.set_mems(mems) {
log::warn!("cgroup cpuset.mems={} failed (non-fatal): {}", mems, e);
}
}
} else {
log::debug!("cpuset controller unavailable; cpus/mems not applied");
}
}
if cfg.net_classid.is_some() && cg.controller_of::<NetClsController>().is_none() {
log::debug!("net_cls controller unavailable (v2-only system); classid not applied");
}
if !cfg.net_priorities.is_empty() && cg.controller_of::<NetPrioController>().is_none() {
log::debug!("net_prio controller unavailable (v2-only system); priorities not applied");
}
cg.add_task_by_tgid(CgroupPid::from(child_pid as u64))
.map_err(|e| io::Error::other(format!("cgroup add_task pid={}: {}", child_pid, e)))?;
Ok(cg)
}
pub fn teardown_cgroup(cg: Cgroup) {
if let Err(e) = cg.delete() {
log::warn!("cgroup delete failed (non-fatal): {}", e);
}
}
#[derive(Debug, Clone, Default)]
pub struct ResourceStats {
pub memory_current_bytes: u64,
pub cpu_usage_ns: u64,
pub pids_current: u64,
}
pub fn read_stats(cg: &Cgroup) -> io::Result<ResourceStats> {
let mut stats = ResourceStats::default();
if let Some(mem_ctrl) = cg.controller_of::<MemController>() {
stats.memory_current_bytes = mem_ctrl.memory_stat().usage_in_bytes;
}
if let Some(cpu_ctrl) = cg.controller_of::<CpuController>() {
let raw = cpu_ctrl.cpu().stat;
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("usage_usec ") {
if let Ok(usec) = rest.trim().parse::<u64>() {
stats.cpu_usage_ns = usec.saturating_mul(1000);
}
break;
}
}
}
if let Some(pid_ctrl) = cg.controller_of::<PidController>() {
if let Ok(current) = pid_ctrl.get_pid_current() {
stats.pids_current = current;
}
}
Ok(stats)
}