use anyhow::Result;
use std::collections::BTreeMap;
use std::time::Duration;
use super::super::config::SchedPolicy;
pub(super) fn read_schedstat(tid: Option<libc::pid_t>) -> Option<(u64, u64, u64)> {
let path: std::borrow::Cow<'static, str> = match tid {
None => std::borrow::Cow::Borrowed("/proc/self/schedstat"),
Some(t) => std::borrow::Cow::Owned(format!("/proc/self/task/{t}/schedstat")),
};
let data = match std::fs::read_to_string(&*path) {
Ok(d) => d,
Err(_) => {
warn_schedstat_unavailable_once();
return None;
}
};
parse_schedstat_line(&data)
}
pub(super) fn parse_schedstat_line(data: &str) -> Option<(u64, u64, u64)> {
let mut parts = data.split_whitespace();
let cpu_time = parts.next()?.parse::<u64>().ok()?;
let run_delay = parts.next()?.parse::<u64>().ok()?;
let timeslices = parts.next()?.parse::<u64>().ok()?;
Some((cpu_time, run_delay, timeslices))
}
pub(super) fn warn_schedstat_unavailable_once() {
static WARNED: std::sync::Once = std::sync::Once::new();
WARNED.call_once(|| {
eprintln!(
"workload: /proc/self/schedstat unavailable (CONFIG_SCHEDSTATS off?); \
schedstat_* fields in WorkerReport will be zero"
);
});
}
pub(super) fn read_numa_maps_pages() -> BTreeMap<usize, u64> {
let content = match std::fs::read_to_string("/proc/self/numa_maps") {
Ok(c) => c,
Err(_) => return BTreeMap::new(),
};
let entries = crate::assert::parse_numa_maps(&content);
let mut totals: BTreeMap<usize, u64> = BTreeMap::new();
for entry in &entries {
for (&node, &count) in &entry.node_pages {
*totals.entry(node).or_insert(0) += count;
}
}
totals
}
pub(super) fn read_vmstat_numa_pages_migrated() -> u64 {
let content = match std::fs::read_to_string("/proc/vmstat") {
Ok(c) => c,
Err(_) => return 0,
};
crate::assert::parse_vmstat_numa_pages_migrated(&content).unwrap_or(0)
}
pub(super) fn clock_gettime_ns(clk: libc::clockid_t) -> Option<u64> {
let mut ts = libc::timespec {
tv_sec: 0,
tv_nsec: 0,
};
let rc = unsafe { libc::clock_gettime(clk, &mut ts) };
if rc != 0 {
warn_clock_gettime_failed_once(clk);
return None;
}
Some((ts.tv_sec as u64) * 1_000_000_000 + (ts.tv_nsec as u64))
}
pub(super) fn warn_clock_gettime_failed_once(clk: libc::clockid_t) {
static WARNED_THREAD: std::sync::Once = std::sync::Once::new();
static WARNED_MONO: std::sync::Once = std::sync::Once::new();
let once = match clk {
libc::CLOCK_THREAD_CPUTIME_ID => &WARNED_THREAD,
libc::CLOCK_MONOTONIC => &WARNED_MONO,
_ => unreachable!("unexpected clockid {clk}"),
};
once.call_once(|| {
let errno = std::io::Error::last_os_error();
eprintln!(
"workload: clock_gettime(clk={clk}) failed: {errno}; affected samples will be zero or skipped"
);
});
}
pub(super) fn thread_cpu_time_ns() -> u64 {
clock_gettime_ns(libc::CLOCK_THREAD_CPUTIME_ID).unwrap_or(0)
}
pub(super) fn duration_to_kernel_ns(d: Duration, field: &str) -> Result<u64> {
let ns_u128 = d.as_nanos();
if ns_u128 > i64::MAX as u128 {
anyhow::bail!(
"sched_setattr: {field} duration ({ns_u128} ns) exceeds i64::MAX — \
nanosecond count must fit in 63 bits (kernel reserves bit 63)"
);
}
Ok(ns_u128 as u64)
}
pub(super) fn set_sched_policy(pid: libc::pid_t, policy: SchedPolicy) -> Result<()> {
if pid <= 0 {
anyhow::bail!("sched_setscheduler: invalid pid {pid} (must be > 0)");
}
let (pol, prio) = match policy {
SchedPolicy::Normal => return Ok(()),
SchedPolicy::Batch => (libc::SCHED_BATCH, 0),
SchedPolicy::Idle => (libc::SCHED_IDLE, 0),
SchedPolicy::Fifo(p) => (libc::SCHED_FIFO, p.clamp(1, 99) as i32),
SchedPolicy::RoundRobin(p) => (libc::SCHED_RR, p.clamp(1, 99) as i32),
SchedPolicy::Deadline {
runtime,
deadline,
period,
} => {
if deadline.is_zero() {
anyhow::bail!(
"sched_setattr: deadline must be > 0 (kernel `__checkparam_dl` rejects zero deadline)"
);
}
let runtime_ns = duration_to_kernel_ns(runtime, "runtime")?;
let deadline_ns = duration_to_kernel_ns(deadline, "deadline")?;
let period_ns = duration_to_kernel_ns(period, "period")?;
if runtime_ns < 1024 {
anyhow::bail!(
"sched_setattr: runtime ({runtime_ns} ns) below kernel DL_SCALE floor (1024 ns)"
);
}
if runtime_ns > deadline_ns {
anyhow::bail!(
"sched_setattr: runtime ({runtime_ns} ns) > deadline ({deadline_ns} ns)"
);
}
if period_ns != 0 && deadline_ns > period_ns {
anyhow::bail!(
"sched_setattr: deadline ({deadline_ns} ns) > period ({period_ns} ns)"
);
}
let mut attr: libc::sched_attr = unsafe { std::mem::zeroed() };
attr.size = std::mem::size_of::<libc::sched_attr>() as u32;
attr.sched_policy = libc::SCHED_DEADLINE as u32;
attr.sched_runtime = runtime_ns;
attr.sched_deadline = deadline_ns;
attr.sched_period = period_ns;
let ret = unsafe {
libc::syscall(
libc::SYS_sched_setattr,
pid,
&attr as *const libc::sched_attr,
0u32,
)
};
if ret != 0 {
anyhow::bail!("sched_setattr: {}", std::io::Error::last_os_error());
}
return Ok(());
}
};
let param = libc::sched_param {
sched_priority: prio,
};
if unsafe { libc::sched_setscheduler(pid, pol, ¶m) } != 0 {
anyhow::bail!("sched_setscheduler: {}", std::io::Error::last_os_error());
}
Ok(())
}