use anyhow::{Context, Result};
use crate::flock::{FlockMode, try_flock};
#[derive(Debug)]
pub struct ResourceContention {
pub reason: String,
}
impl std::fmt::Display for ResourceContention {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.reason)
}
}
impl std::error::Error for ResourceContention {}
#[derive(Debug, Clone)]
pub struct LlcGroup {
pub cpus: Vec<usize>,
}
#[derive(Debug, Clone)]
pub struct HostTopology {
pub llc_groups: Vec<LlcGroup>,
pub online_cpus: Vec<usize>,
pub cpu_to_node: std::collections::HashMap<usize, usize>,
pub(crate) host_node_llcs: std::collections::BTreeMap<usize, Vec<usize>>,
}
#[derive(Debug)]
pub struct PinningPlan {
pub assignments: Vec<(u32, usize)>,
pub service_cpu: Option<usize>,
pub llc_indices: Vec<usize>,
#[allow(dead_code)] pub(crate) locks: Vec<std::os::fd::OwnedFd>,
}
impl HostTopology {
pub fn from_sysfs() -> Result<Self> {
let topo = crate::topology::TestTopology::from_system()
.context("read host topology from sysfs")?;
let online_cpus = topo.all_cpus().to_vec();
let llc_groups: Vec<LlcGroup> = topo
.llcs()
.iter()
.map(|llc| LlcGroup {
cpus: llc.cpus().to_vec(),
})
.collect();
let cpu_to_node: std::collections::HashMap<usize, usize> = topo
.llcs()
.iter()
.flat_map(|llc| llc.cpus().iter().map(|&cpu| (cpu, llc.numa_node())))
.collect();
let host_node_llcs = Self::compute_host_node_llcs(&llc_groups, &cpu_to_node);
Ok(Self {
llc_groups,
online_cpus,
cpu_to_node,
host_node_llcs,
})
}
#[cfg(test)]
pub(crate) fn new_for_tests(groups: &[(Vec<usize>, usize)]) -> Self {
let llc_groups: Vec<LlcGroup> = groups
.iter()
.map(|(cpus, _)| LlcGroup { cpus: cpus.clone() })
.collect();
let cpu_to_node: std::collections::HashMap<usize, usize> = groups
.iter()
.flat_map(|(cpus, node)| cpus.iter().map(move |&cpu| (cpu, *node)))
.collect();
let online_cpus: Vec<usize> = groups
.iter()
.flat_map(|(cpus, _)| cpus.iter().copied())
.collect();
let host_node_llcs = HostTopology::compute_host_node_llcs(&llc_groups, &cpu_to_node);
HostTopology {
llc_groups,
online_cpus,
cpu_to_node,
host_node_llcs,
}
}
fn compute_host_node_llcs(
llc_groups: &[LlcGroup],
cpu_to_node: &std::collections::HashMap<usize, usize>,
) -> std::collections::BTreeMap<usize, Vec<usize>> {
let mut node_llcs: std::collections::BTreeMap<usize, Vec<usize>> =
std::collections::BTreeMap::new();
for (idx, group) in llc_groups.iter().enumerate() {
let mut counts: std::collections::HashMap<usize, usize> =
std::collections::HashMap::new();
for &cpu in &group.cpus {
let node = cpu_to_node.get(&cpu).copied().unwrap_or(0);
*counts.entry(node).or_insert(0) += 1;
}
let node = counts
.into_iter()
.max_by_key(|&(_, count)| count)
.map(|(node, _)| node)
.unwrap_or(0);
node_llcs.entry(node).or_default().push(idx);
}
for llcs in node_llcs.values_mut() {
llcs.sort_unstable();
}
node_llcs
}
pub fn max_cores_per_llc(&self) -> usize {
self.llc_groups
.iter()
.map(|g| g.cpus.len())
.max()
.unwrap_or(0)
}
pub fn total_cpus(&self) -> usize {
self.online_cpus.len()
}
#[allow(dead_code)]
pub(crate) fn host_llcs_by_numa_node(&self) -> &std::collections::BTreeMap<usize, Vec<usize>> {
&self.host_node_llcs
}
pub(crate) fn numa_nodes_with_capacity(&self, min_llcs: usize) -> Vec<(usize, &Vec<usize>)> {
self.host_node_llcs
.iter()
.filter(|(_, llcs)| llcs.len() >= min_llcs)
.map(|(&node, llcs)| (node, llcs))
.collect()
}
pub(crate) fn numa_nodes_sorted_by_distance(
&self,
anchor: usize,
distance_fn: impl Fn(usize, usize) -> u8,
) -> Vec<usize> {
let mut nodes: Vec<(usize, u8)> = self
.host_node_llcs
.keys()
.map(|&node| (node, distance_fn(anchor, node)))
.collect();
nodes.sort_by(|a, b| {
let a_unreachable = a.1 == 255;
let b_unreachable = b.1 == 255;
match (a_unreachable, b_unreachable) {
(true, false) => std::cmp::Ordering::Greater,
(false, true) => std::cmp::Ordering::Less,
_ => a.1.cmp(&b.1),
}
});
nodes.into_iter().map(|(node, _)| node).collect()
}
pub fn llc_numa_node(&self, llc_idx: usize) -> usize {
let group = &self.llc_groups[llc_idx];
let mut counts: std::collections::HashMap<usize, usize> = std::collections::HashMap::new();
for &cpu in &group.cpus {
let node = self.cpu_to_node.get(&cpu).copied().unwrap_or(0);
*counts.entry(node).or_insert(0) += 1;
}
counts
.into_iter()
.max_by_key(|&(_, count)| count)
.map(|(node, _)| node)
.unwrap_or(0)
}
pub fn compute_pinning(
&self,
topo: &super::topology::Topology,
reserve_service_cpu: bool,
llc_offset: usize,
) -> Result<PinningPlan> {
let cores = topo.cores_per_llc;
let threads = topo.threads_per_core;
let llcs = topo.llcs;
let vcpus_per_llc = cores * threads;
let total_vcpus = llcs * vcpus_per_llc;
let total_needed = total_vcpus as usize + if reserve_service_cpu { 1 } else { 0 };
anyhow::ensure!(
total_needed <= self.total_cpus(),
"performance_mode: need {} CPUs ({} vCPUs + {} service) \
but only {} host CPUs available",
total_needed,
total_vcpus,
if reserve_service_cpu { 1 } else { 0 },
self.total_cpus(),
);
let num_llcs = self.llc_groups.len();
anyhow::ensure!(
llcs as usize <= num_llcs,
"performance_mode: need {} LLCs for {} virtual LLCs, \
but host has {} LLC groups",
llcs,
llcs,
num_llcs,
);
let llc_order = self.numa_aware_llc_order(topo.numa_nodes, llcs, llc_offset);
let mut assignments = Vec::with_capacity(total_vcpus as usize);
let mut used_cpus = std::collections::HashSet::new();
for llc in 0..llcs {
let llc_idx = llc_order[llc as usize];
let group = &self.llc_groups[llc_idx];
let available: Vec<usize> = group
.cpus
.iter()
.copied()
.filter(|c| !used_cpus.contains(c))
.collect();
anyhow::ensure!(
available.len() >= vcpus_per_llc as usize,
"performance_mode: LLC group {} has {} available CPUs, \
need {} for virtual LLC {}",
llc_idx,
available.len(),
vcpus_per_llc,
llc,
);
for vcpu_in_llc in 0..vcpus_per_llc {
let vcpu_id = llc * vcpus_per_llc + vcpu_in_llc;
let host_cpu = available[vcpu_in_llc as usize];
used_cpus.insert(host_cpu);
assignments.push((vcpu_id, host_cpu));
}
}
let service_cpu = if reserve_service_cpu {
let cpu = self
.online_cpus
.iter()
.copied()
.find(|c| !used_cpus.contains(c));
anyhow::ensure!(
cpu.is_some(),
"performance_mode: no free host CPU for service threads \
after assigning {} vCPUs",
total_vcpus,
);
cpu
} else {
None
};
let mut llc_indices = llc_order;
llc_indices.sort_unstable();
llc_indices.dedup();
Ok(PinningPlan {
assignments,
service_cpu,
llc_indices,
locks: Vec::new(),
})
}
pub(crate) fn numa_aware_llc_order(
&self,
numa_nodes: u32,
llcs: u32,
llc_offset: usize,
) -> Vec<usize> {
let num_host_llcs = self.llc_groups.len();
let sequential_fallback = || -> Vec<usize> {
(0..llcs as usize)
.map(|i| (i + llc_offset) % num_host_llcs)
.collect()
};
if numa_nodes == 0 || numa_nodes == 1 || self.cpu_to_node.is_empty() {
return sequential_fallback();
}
if llcs < numa_nodes {
return sequential_fallback();
}
let base_per_node = (llcs / numa_nodes) as usize;
let remainder = (llcs % numa_nodes) as usize;
let max_per_node = base_per_node + if remainder > 0 { 1 } else { 0 };
let eligible_nodes = self.numa_nodes_with_capacity(max_per_node);
if eligible_nodes.len() < numa_nodes as usize {
return sequential_fallback();
}
let mut order = Vec::with_capacity(llcs as usize);
let node_offset = llc_offset / max_per_node.max(1);
for guest_node in 0..numa_nodes as usize {
let host_idx = (guest_node + node_offset) % eligible_nodes.len();
let (_, host_llcs) = &eligible_nodes[host_idx];
let within_offset = llc_offset % host_llcs.len();
let count = if guest_node < remainder {
base_per_node + 1
} else {
base_per_node
};
for i in 0..count {
let llc_idx = host_llcs[(i + within_offset) % host_llcs.len()];
order.push(llc_idx);
}
}
order
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LlcLockMode {
Exclusive,
#[allow(dead_code)]
Shared,
}
#[derive(Debug)]
pub enum LockOutcome {
Acquired {
#[allow(dead_code)]
llc_offset: usize,
locks: Vec<std::os::fd::OwnedFd>,
},
Unavailable(#[allow(dead_code)] String),
}
pub fn acquire_resource_locks(
plan: &PinningPlan,
llc_indices: &[usize],
llc_mode: LlcLockMode,
) -> Result<LockOutcome> {
match try_acquire_all(plan, llc_indices, llc_mode) {
Ok(locks) => Ok(LockOutcome::Acquired {
llc_offset: llc_indices.first().copied().unwrap_or(0),
locks,
}),
Err(reason) => Ok(LockOutcome::Unavailable(reason)),
}
}
const LLC_LOCK_PREFIX: &str = "/tmp/ktstr-llc-";
const CPU_LOCK_PREFIX: &str = "/tmp/ktstr-cpu-";
#[cfg(test)]
thread_local! {
static LLC_LOCK_PREFIX_OVERRIDE: std::cell::RefCell<Option<String>> =
const { std::cell::RefCell::new(None) };
static CPU_LOCK_PREFIX_OVERRIDE: std::cell::RefCell<Option<String>> =
const { std::cell::RefCell::new(None) };
}
fn llc_lock_path(llc_idx: usize) -> String {
#[cfg(test)]
{
if let Some(p) = LLC_LOCK_PREFIX_OVERRIDE.with(|p| p.borrow().clone()) {
return format!("{p}{llc_idx}.lock");
}
}
format!("{LLC_LOCK_PREFIX}{llc_idx}.lock")
}
fn cpu_lock_path(cpu: usize) -> String {
#[cfg(test)]
{
if let Some(p) = CPU_LOCK_PREFIX_OVERRIDE.with(|p| p.borrow().clone()) {
return format!("{p}{cpu}.lock");
}
}
format!("{CPU_LOCK_PREFIX}{cpu}.lock")
}
fn try_acquire_all(
plan: &PinningPlan,
llc_indices: &[usize],
llc_mode: LlcLockMode,
) -> std::result::Result<Vec<std::os::fd::OwnedFd>, String> {
let flock_mode = match llc_mode {
LlcLockMode::Exclusive => FlockMode::Exclusive,
LlcLockMode::Shared => FlockMode::Shared,
};
let mut locks = Vec::new();
for &llc_idx in llc_indices {
let path = llc_lock_path(llc_idx);
match try_flock(&path, flock_mode) {
Ok(Some(fd)) => locks.push(fd),
Ok(None) => return Err(format!("LLC {llc_idx} busy")),
Err(e) => return Err(format!("LLC {llc_idx}: {e}")),
}
}
if llc_mode != LlcLockMode::Exclusive {
for &(_vcpu, host_cpu) in &plan.assignments {
let path = cpu_lock_path(host_cpu);
match try_flock(&path, FlockMode::Exclusive) {
Ok(Some(fd)) => locks.push(fd),
Ok(None) => return Err(format!("CPU {host_cpu} busy")),
Err(e) => return Err(format!("CPU {host_cpu}: {e}")),
}
}
if let Some(cpu) = plan.service_cpu {
let path = cpu_lock_path(cpu);
match try_flock(&path, FlockMode::Exclusive) {
Ok(Some(fd)) => locks.push(fd),
Ok(None) => return Err(format!("service CPU {cpu} busy")),
Err(e) => return Err(format!("service CPU {cpu}: {e}")),
}
}
}
Ok(locks)
}
pub fn acquire_cpu_locks(
count: usize,
total_host_cpus: usize,
host_topo: Option<&HostTopology>,
) -> Result<Vec<std::os::fd::OwnedFd>> {
if count == 0 {
return Ok(Vec::new());
}
let mut offset = 0;
while offset + count <= total_host_cpus {
match try_acquire_cpu_window(offset, count) {
Ok(mut locks) => {
if let Some(topo) = host_topo {
let cpus: Vec<usize> = (offset..offset + count).collect();
match acquire_llc_shared_locks(topo, &cpus) {
Ok(llc_locks) => locks.extend(llc_locks),
Err(_) => {
drop(locks);
offset += 1;
continue;
}
}
}
return Ok(locks);
}
Err(_) => {
offset += 1;
}
}
}
Err(anyhow::Error::new(ResourceContention {
reason: format!(
"no {count} consecutive CPUs available\n \
hint: pass --no-perf-mode or set KTSTR_NO_PERF_MODE=1 to run without CPU reservation"
),
}))
}
pub(crate) fn host_allowed_cpus() -> Vec<usize> {
#[cfg(test)]
{
if let Some(override_set) = ALLOWED_CPUS_OVERRIDE.with(|p| p.borrow().clone()) {
return override_set;
}
}
if let Some(cpus) = crate::host_state::read_affinity(0) {
return cpus.into_iter().map(|c| c as usize).collect();
}
if let Ok(raw) = std::fs::read_to_string("/proc/self/status") {
for line in raw.lines() {
if let Some(v) = line.strip_prefix("Cpus_allowed_list:")
&& let Some(parsed) = crate::host_state::parse_cpu_list(v.trim())
{
return parsed.into_iter().map(|c| c as usize).collect();
}
}
}
Vec::new()
}
#[cfg(test)]
thread_local! {
pub(crate) static ALLOWED_CPUS_OVERRIDE: std::cell::RefCell<Option<Vec<usize>>> =
const { std::cell::RefCell::new(None) };
}
fn default_cpu_budget(allowed_cpus: usize) -> usize {
allowed_cpus.saturating_mul(30).div_ceil(100).max(1)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CpuCap {
n: std::num::NonZeroUsize,
}
impl CpuCap {
pub fn new(n: usize) -> Result<Self> {
std::num::NonZeroUsize::new(n)
.map(|n| CpuCap { n })
.ok_or_else(|| anyhow::anyhow!("--cpu-cap must be ≥ 1 CPU (got 0)"))
}
pub fn resolve(cli_flag: Option<usize>) -> Result<Option<CpuCap>> {
if let Some(n) = cli_flag {
return Ok(Some(CpuCap::new(n)?));
}
match std::env::var("KTSTR_CPU_CAP") {
Ok(s) if s.is_empty() => Ok(None),
Ok(s) => {
let n: usize = s
.parse()
.with_context(|| format!("KTSTR_CPU_CAP is not a valid integer: {s:?}"))?;
Ok(Some(CpuCap::new(n)?))
}
Err(std::env::VarError::NotPresent) => Ok(None),
Err(std::env::VarError::NotUnicode(raw)) => {
anyhow::bail!(
"KTSTR_CPU_CAP contains non-UTF-8 bytes ({} bytes): {raw:?}. \
Set an integer value or unset.",
raw.len(),
)
}
}
}
pub fn effective_count(&self, allowed_cpus: usize) -> Result<usize> {
let n = self.n.get();
if n > allowed_cpus {
return Err(anyhow::Error::new(ResourceContention {
reason: format!(
"--cpu-cap N = {n} exceeds the {allowed_cpus} CPUs this \
process is allowed on (from sched_getaffinity / \
Cpus_allowed_list). Pick a cap ≤ {allowed_cpus}, release \
the cgroup/taskset constraint restricting this process, \
or omit --cpu-cap to use the 30% default of the allowed \
set."
),
}));
}
Ok(n)
}
}
#[derive(Debug, Clone)]
pub(crate) struct LlcSnapshot {
pub(crate) llc_idx: usize,
pub(crate) lockfile_path: std::path::PathBuf,
pub(crate) holders: Vec<crate::flock::HolderInfo>,
pub(crate) holder_count: usize,
}
#[derive(Debug)]
pub struct LlcPlan {
pub locked_llcs: Vec<usize>,
pub cpus: Vec<usize>,
pub mems: std::collections::BTreeSet<usize>,
#[allow(dead_code)]
pub(crate) snapshot: Vec<LlcSnapshot>,
#[allow(dead_code)] pub(crate) locks: Vec<std::os::fd::OwnedFd>,
}
const ACQUIRE_MAX_TOCTOU_RETRIES: u32 = 1;
fn discover_llc_snapshots(topo: &HostTopology, mountinfo: &str) -> Result<Vec<LlcSnapshot>> {
let mut snapshots: Vec<LlcSnapshot> = Vec::with_capacity(topo.llc_groups.len());
for llc_idx in 0..topo.llc_groups.len() {
let path = std::path::PathBuf::from(llc_lock_path(llc_idx));
crate::flock::materialize(&path)?;
let holders =
crate::flock::read_holders_with_mountinfo(&path, mountinfo).unwrap_or_default();
let holder_count = holders.len();
snapshots.push(LlcSnapshot {
llc_idx,
lockfile_path: path,
holders,
holder_count,
});
}
Ok(snapshots)
}
fn plan_from_snapshots(
snapshots: &[LlcSnapshot],
target_cpus: usize,
topo: &HostTopology,
allowed: &std::collections::BTreeSet<usize>,
distance_fn: impl Fn(usize, usize) -> u8,
) -> Vec<usize> {
if target_cpus == 0 {
return Vec::new();
}
let llc_allowed_cpus = |idx: usize| -> usize {
topo.llc_groups[idx]
.cpus
.iter()
.filter(|c| allowed.contains(c))
.count()
};
let total_allowed_in_llcs: usize = (0..snapshots.len()).map(llc_allowed_cpus).sum();
if target_cpus >= total_allowed_in_llcs {
let mut all: Vec<usize> = (0..snapshots.len())
.filter(|&idx| llc_allowed_cpus(idx) > 0)
.collect();
all.sort_unstable();
return all;
}
let eligible = |s: &&LlcSnapshot| -> bool { llc_allowed_cpus(s.llc_idx) > 0 };
let mut consolidation: Vec<&LlcSnapshot> = snapshots
.iter()
.filter(|s| s.holder_count > 0)
.filter(eligible)
.collect();
let mut fresh: Vec<&LlcSnapshot> = snapshots
.iter()
.filter(|s| s.holder_count == 0)
.filter(eligible)
.collect();
consolidation.sort_by(|a, b| {
b.holder_count
.cmp(&a.holder_count)
.then(a.llc_idx.cmp(&b.llc_idx))
});
fresh.sort_by_key(|s| s.llc_idx);
let ranked: Vec<&LlcSnapshot> = consolidation.into_iter().chain(fresh).collect();
if ranked.is_empty() {
return Vec::new();
}
let seed = ranked[0];
let seed_node = topo.llc_numa_node(seed.llc_idx);
let node_order = topo.numa_nodes_sorted_by_distance(seed_node, distance_fn);
let mut selected: Vec<usize> = Vec::new();
let mut picked: std::collections::HashSet<usize> = std::collections::HashSet::new();
let mut accumulated: usize = 0;
for node in node_order {
if accumulated >= target_cpus {
break;
}
for snap in &ranked {
if accumulated >= target_cpus {
break;
}
if picked.contains(&snap.llc_idx) {
continue;
}
if topo.llc_numa_node(snap.llc_idx) != node {
continue;
}
selected.push(snap.llc_idx);
picked.insert(snap.llc_idx);
accumulated += llc_allowed_cpus(snap.llc_idx);
}
}
selected.sort_unstable();
selected
}
fn try_acquire_llc_plan_locks(
selected: &[usize],
snapshots: &[LlcSnapshot],
) -> Result<Option<Vec<std::os::fd::OwnedFd>>> {
let mut locks: Vec<std::os::fd::OwnedFd> = Vec::with_capacity(selected.len());
for &idx in selected {
let snap = snapshots
.iter()
.find(|s| s.llc_idx == idx)
.expect("selected index must come from snapshots — plan invariant");
match crate::flock::try_flock(&snap.lockfile_path, FlockMode::Shared)? {
Some(fd) => locks.push(fd),
None => {
drop(locks);
return Ok(None);
}
}
}
Ok(Some(locks))
}
pub fn acquire_llc_plan(
topo: &HostTopology,
test_topo: &crate::topology::TestTopology,
cpu_cap: Option<CpuCap>,
) -> Result<LlcPlan> {
acquire_llc_plan_with_acquire_fn(topo, test_topo, cpu_cap, try_acquire_llc_plan_locks)
}
fn acquire_llc_plan_with_acquire_fn<F>(
topo: &HostTopology,
test_topo: &crate::topology::TestTopology,
cpu_cap: Option<CpuCap>,
mut acquire_fn: F,
) -> Result<LlcPlan>
where
F: FnMut(&[usize], &[LlcSnapshot]) -> Result<Option<Vec<std::os::fd::OwnedFd>>>,
{
let allowed_vec = host_allowed_cpus();
if allowed_vec.is_empty() {
anyhow::bail!(
"acquire_llc_plan: could not determine the calling process's \
allowed CPU set (both sched_getaffinity and \
/proc/self/status Cpus_allowed_list failed). Cannot plan a \
reservation without knowing which CPUs are schedulable."
);
}
let allowed: std::collections::BTreeSet<usize> = allowed_vec.iter().copied().collect();
let allowed_cpus = allowed.len();
let target_cpus = match cpu_cap {
Some(cap) => cap.effective_count(allowed_cpus)?,
None => default_cpu_budget(allowed_cpus),
};
if target_cpus == 0 {
anyhow::bail!("acquire_llc_plan: CPU budget resolved to zero");
}
let mountinfo = crate::flock::read_mountinfo()?;
let mut attempt: u32 = 0;
loop {
let snapshots = discover_llc_snapshots(topo, &mountinfo)?;
let selected = plan_from_snapshots(&snapshots, target_cpus, topo, &allowed, |from, to| {
test_topo.numa_distance(from, to)
});
if selected.is_empty() {
anyhow::bail!(
"acquire_llc_plan: no host LLC overlaps the process's \
{allowed_cpus}-CPU allowed set — sysfs LLC groups and \
sched_getaffinity disagree. Check for a stale \
/sys/devices/system/cpu view or a cgroup cpuset that \
excludes every LLC."
);
}
match acquire_fn(&selected, &snapshots)? {
Some(locks) => {
let mut cpus: Vec<usize> = Vec::new();
let mut mems: std::collections::BTreeSet<usize> = std::collections::BTreeSet::new();
'outer: for &idx in &selected {
let group = &topo.llc_groups[idx];
for &cpu in &group.cpus {
if !allowed.contains(&cpu) {
continue;
}
if cpus.len() >= target_cpus {
break 'outer;
}
cpus.push(cpu);
let node = topo.cpu_to_node.get(&cpu).copied().unwrap_or(0);
mems.insert(node);
}
}
return Ok(LlcPlan {
locked_llcs: selected,
cpus,
mems,
snapshot: snapshots,
locks,
});
}
None => {
if attempt >= ACQUIRE_MAX_TOCTOU_RETRIES {
let final_snapshots = discover_llc_snapshots(topo, &mountinfo)?;
let holders: Vec<String> = final_snapshots
.iter()
.filter(|s| !s.holders.is_empty())
.map(|s| {
format!(
"LLC {}: {}",
s.llc_idx,
crate::flock::format_holder_list(&s.holders)
)
})
.collect();
let holder_text = if holders.is_empty() {
"<none recorded>".to_string()
} else {
holders.join("; ")
};
return Err(anyhow::Error::new(ResourceContention {
reason: format!(
"acquire_llc_plan: could not reserve {target_cpus} \
CPU(s) after {retries} TOCTOU retry; holders: \
{holder_text}. Run `ktstr locks --json` to see \
every ktstr lock on this host.",
retries = ACQUIRE_MAX_TOCTOU_RETRIES + 1,
),
}));
}
attempt += 1;
}
}
}
}
pub fn make_jobs_for_plan(plan: &LlcPlan) -> usize {
plan.cpus.len().max(1)
}
pub fn format_llc_list(locked: &[usize], topo: &HostTopology) -> String {
let parts: Vec<String> = locked
.iter()
.map(|&idx| {
if topo.cpu_to_node.is_empty() {
idx.to_string()
} else {
let node = topo.llc_numa_node(idx);
format!("{idx} (node {node})")
}
})
.collect();
format!("[{}]", parts.join(", "))
}
pub fn warn_if_cross_node_spill(plan: &LlcPlan, topo: &HostTopology) {
if should_warn_cross_node(&plan.mems) {
eprintln!(
"ktstr: reserving LLCs {list} across {n} NUMA nodes \
(preferred single-node contiguous unavailable). Build \
will run; memory-access latency may be higher.",
list = format_llc_list(&plan.locked_llcs, topo),
n = plan.mems.len(),
);
}
}
fn should_warn_cross_node(mems: &std::collections::BTreeSet<usize>) -> bool {
mems.len() > 1
}
fn acquire_llc_shared_locks(
topo: &HostTopology,
cpus: &[usize],
) -> std::result::Result<Vec<std::os::fd::OwnedFd>, String> {
let mut llc_indices: Vec<usize> = Vec::new();
for &cpu in cpus {
for (idx, group) in topo.llc_groups.iter().enumerate() {
if group.cpus.contains(&cpu) && !llc_indices.contains(&idx) {
llc_indices.push(idx);
}
}
}
let mut locks = Vec::new();
for &llc_idx in &llc_indices {
let path = llc_lock_path(llc_idx);
match try_flock(&path, FlockMode::Shared) {
Ok(Some(fd)) => locks.push(fd),
Ok(None) => return Err(format!("LLC {llc_idx} exclusively held")),
Err(e) => return Err(format!("LLC {llc_idx}: {e}")),
}
}
Ok(locks)
}
fn try_acquire_cpu_window(
offset: usize,
count: usize,
) -> std::result::Result<Vec<std::os::fd::OwnedFd>, String> {
let mut locks = Vec::with_capacity(count);
for cpu in offset..offset + count {
let path = cpu_lock_path(cpu);
match try_flock(&path, FlockMode::Exclusive) {
Ok(Some(fd)) => locks.push(fd),
Ok(None) => return Err(format!("CPU {cpu} busy")),
Err(e) => return Err(format!("CPU {cpu}: {e}")),
}
}
Ok(locks)
}
pub fn mbind_to_nodes(addr: *mut u8, len: usize, nodes: &[usize]) {
if nodes.is_empty() || len == 0 {
return;
}
let node_set: std::collections::BTreeSet<usize> = nodes.iter().copied().collect();
let (nodemask, maxnode) = crate::workload::build_nodemask(&node_set);
let rc = unsafe {
libc::syscall(
libc::SYS_mbind,
addr as *mut libc::c_void,
len,
libc::MPOL_BIND,
nodemask.as_ptr(),
maxnode,
0u32,
)
};
if rc == 0 {
eprintln!(
"performance_mode: mbind {} MB to NUMA node(s) {:?}",
len >> 20,
nodes,
);
} else {
let err = std::io::Error::last_os_error();
eprintln!(
"performance_mode: WARNING: mbind to node(s) {:?} failed: {err}",
nodes,
);
}
}
use crate::topology::parse_cpu_list_lenient;
pub fn hugepages_free() -> u64 {
std::fs::read_to_string("/sys/kernel/mm/hugepages/hugepages-2048kB/free_hugepages")
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
.unwrap_or(0)
}
pub fn hugepages_needed(memory_mb: u32) -> u64 {
(memory_mb as u64).div_ceil(2)
}
pub fn host_load_estimate() -> Option<(usize, usize)> {
let stat = std::fs::read_to_string("/proc/stat").ok()?;
let procs_running = stat
.lines()
.find(|l| l.starts_with("procs_running "))?
.split_whitespace()
.nth(1)?
.parse::<usize>()
.ok()?;
let online = std::fs::read_to_string("/sys/devices/system/cpu/online").ok()?;
let total = parse_cpu_list_lenient(&online).len();
Some((procs_running, total))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vmm::topology::Topology;
fn numa_nodes_for_cpus(
topo: &HostTopology,
cpus: &[usize],
) -> std::collections::BTreeSet<usize> {
cpus.iter()
.map(|c| topo.cpu_to_node.get(c).copied().unwrap_or(0))
.collect()
}
#[test]
fn parse_cpu_list_range() {
assert_eq!(parse_cpu_list_lenient("0-3"), vec![0, 1, 2, 3]);
}
#[test]
fn parse_cpu_list_single() {
assert_eq!(parse_cpu_list_lenient("5"), vec![5]);
}
#[test]
fn parse_cpu_list_mixed() {
assert_eq!(
parse_cpu_list_lenient("0-2,5,7-9"),
vec![0, 1, 2, 5, 7, 8, 9]
);
}
#[test]
fn parse_cpu_list_empty() {
assert!(parse_cpu_list_lenient("").is_empty());
}
#[test]
fn parse_cpu_list_whitespace() {
assert_eq!(parse_cpu_list_lenient("0-3\n"), vec![0, 1, 2, 3]);
}
#[test]
fn host_topology_from_sysfs() {
let topo = HostTopology::from_sysfs();
assert!(topo.is_ok(), "should read host topology: {:?}", topo.err());
let topo = topo.unwrap();
assert!(!topo.online_cpus.is_empty());
assert!(!topo.llc_groups.is_empty());
}
#[test]
fn pinning_plan_simple() {
let topo = HostTopology::from_sysfs().unwrap();
if topo.total_cpus() < 2 {
return; }
let plan = topo.compute_pinning(&Topology::new(1, 1, 2, 1), false, 0);
assert!(plan.is_ok(), "pinning should succeed: {:?}", plan.err());
let plan = plan.unwrap();
assert_eq!(plan.assignments.len(), 2);
let cpus: Vec<usize> = plan.assignments.iter().map(|a| a.1).collect();
let unique: std::collections::HashSet<usize> = cpus.iter().copied().collect();
assert_eq!(cpus.len(), unique.len());
}
#[test]
fn pinning_plan_oversubscribed() {
let topo = HostTopology::from_sysfs().unwrap();
let too_many = topo.total_cpus() as u32 + 1;
let plan = topo.compute_pinning(&Topology::new(1, 1, too_many, 1), false, 0);
assert!(plan.is_err());
}
#[test]
fn hugepages_needed_values() {
assert_eq!(hugepages_needed(2), 1);
assert_eq!(hugepages_needed(4), 2);
assert_eq!(hugepages_needed(2048), 1024);
assert_eq!(hugepages_needed(3), 2);
}
#[test]
fn hugepages_free_runs() {
let _ = hugepages_free();
}
#[test]
fn host_load_estimate_runs() {
let result = host_load_estimate();
assert!(result.is_some());
let (running, total) = result.unwrap();
assert!(total > 0);
assert!(running >= 1);
}
#[test]
fn parse_cpu_list_trailing_comma() {
assert_eq!(parse_cpu_list_lenient("0,1,2,"), vec![0, 1, 2]);
}
#[test]
fn parse_cpu_list_leading_comma() {
assert_eq!(parse_cpu_list_lenient(",0,1"), vec![0, 1]);
}
#[test]
fn parse_cpu_list_single_zero() {
assert_eq!(parse_cpu_list_lenient("0"), vec![0]);
}
#[test]
fn parse_cpu_list_large_ids() {
assert_eq!(parse_cpu_list_lenient("127,255"), vec![127, 255]);
}
#[test]
fn parse_cpu_list_reversed_range() {
assert!(parse_cpu_list_lenient("5-3").is_empty());
}
#[test]
fn parse_cpu_list_non_numeric() {
assert!(parse_cpu_list_lenient("abc").is_empty());
}
fn synthetic_topo(groups: Vec<Vec<usize>>) -> HostTopology {
let tagged: Vec<(Vec<usize>, usize)> = groups
.into_iter()
.enumerate()
.map(|(node, cpus)| (cpus, node))
.collect();
HostTopology::new_for_tests(&tagged)
}
#[test]
fn compute_pinning_single_llc() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3]]);
let plan = topo
.compute_pinning(&Topology::new(1, 1, 2, 1), false, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 2);
assert_eq!(plan.assignments[0], (0, 0));
assert_eq!(plan.assignments[1], (1, 1));
}
#[test]
fn compute_pinning_two_llcs() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3], vec![4, 5, 6, 7]]);
let plan = topo
.compute_pinning(&Topology::new(1, 2, 2, 1), false, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 4);
assert_eq!(plan.assignments[0], (0, 0));
assert_eq!(plan.assignments[1], (1, 1));
assert_eq!(plan.assignments[2], (2, 4));
assert_eq!(plan.assignments[3], (3, 5));
}
#[test]
fn compute_pinning_with_smt() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3, 4, 5, 6, 7]]);
let plan = topo
.compute_pinning(&Topology::new(1, 1, 2, 2), false, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 4);
let cpus: Vec<usize> = plan.assignments.iter().map(|a| a.1).collect();
let unique: std::collections::HashSet<usize> = cpus.iter().copied().collect();
assert_eq!(cpus.len(), unique.len());
}
#[test]
fn compute_pinning_exact_fit() {
let topo = synthetic_topo(vec![vec![0, 1], vec![2, 3]]);
let plan = topo
.compute_pinning(&Topology::new(1, 2, 2, 1), false, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 4);
let assigned: std::collections::HashSet<usize> =
plan.assignments.iter().map(|a| a.1).collect();
let all_cpus: std::collections::HashSet<usize> = topo.online_cpus.iter().copied().collect();
assert_eq!(assigned, all_cpus, "exact fit must consume all host CPUs");
assert_eq!(
assigned.len(),
plan.assignments.len(),
"all assignments must be unique",
);
}
#[test]
fn compute_pinning_error_too_many_vcpus() {
let topo = synthetic_topo(vec![vec![0, 1]]);
let err = topo
.compute_pinning(&Topology::new(1, 1, 4, 1), false, 0)
.unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("4 vCPUs") && msg.contains("2 host CPUs"),
"error should mention CPU counts: {msg}",
);
}
#[test]
fn compute_pinning_error_too_many_llcs() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3]]);
let err = topo
.compute_pinning(&Topology::new(1, 2, 1, 1), false, 0)
.unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("2 LLCs") && msg.contains("1 LLC groups"),
"error should mention LLC count mismatch: {msg}",
);
}
#[test]
fn compute_pinning_error_llc_too_small() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3], vec![4]]);
let err = topo
.compute_pinning(&Topology::new(1, 2, 2, 1), false, 0)
.unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("LLC group 1") && msg.contains("1 available"),
"error should identify the undersized LLC: {msg}",
);
}
#[test]
fn compute_pinning_no_cross_llc_sharing() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3], vec![4, 5, 6, 7], vec![8, 9, 10, 11]]);
let plan = topo
.compute_pinning(&Topology::new(1, 3, 2, 1), false, 0)
.unwrap();
for (vcpu_id, host_cpu) in &plan.assignments {
let llc_idx = vcpu_id / 2; let llc_start = llc_idx as usize * 4;
let llc_end = llc_start + 3;
assert!(
*host_cpu >= llc_start && *host_cpu <= llc_end,
"vCPU {vcpu_id} (LLC {llc_idx}) pinned to CPU {host_cpu}, \
expected range {llc_start}..={llc_end}",
);
}
}
#[test]
fn compute_pinning_all_assignments_unique() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3], vec![4, 5, 6, 7]]);
let plan = topo
.compute_pinning(&Topology::new(1, 2, 4, 1), false, 0)
.unwrap();
let cpus: Vec<usize> = plan.assignments.iter().map(|a| a.1).collect();
let unique: std::collections::HashSet<usize> = cpus.iter().copied().collect();
assert_eq!(
cpus.len(),
unique.len(),
"all host CPU assignments must be unique: {:?}",
cpus,
);
}
#[test]
fn compute_pinning_vcpu_ids_sequential() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3]]);
let plan = topo
.compute_pinning(&Topology::new(1, 1, 4, 1), false, 0)
.unwrap();
let vcpu_ids: Vec<u32> = plan.assignments.iter().map(|a| a.0).collect();
assert_eq!(vcpu_ids, vec![0, 1, 2, 3]);
}
#[test]
fn compute_pinning_single_vcpu() {
let topo = synthetic_topo(vec![vec![42]]);
let plan = topo
.compute_pinning(&Topology::new(1, 1, 1, 1), false, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 1);
assert_eq!(plan.assignments[0], (0, 42));
}
#[test]
fn sysfs_llc_groups_cover_all_cpus() {
let topo = HostTopology::from_sysfs().unwrap();
let llc_cpus: Vec<usize> = topo
.llc_groups
.iter()
.flat_map(|g| g.cpus.iter().copied())
.collect();
for cpu in &topo.online_cpus {
assert!(
llc_cpus.contains(cpu),
"CPU {} is online but not in any LLC group",
cpu,
);
}
}
#[test]
fn sysfs_llc_groups_nonempty() {
let topo = HostTopology::from_sysfs().unwrap();
for (i, group) in topo.llc_groups.iter().enumerate() {
assert!(
!group.cpus.is_empty(),
"LLC group {} should have at least one CPU",
i,
);
}
}
#[test]
fn sysfs_pinning_respects_llc_boundaries() {
let topo = HostTopology::from_sysfs().unwrap();
if topo.llc_groups.len() < 2 || topo.total_cpus() < 4 {
return; }
let min_llc_size = topo.llc_groups.iter().map(|g| g.cpus.len()).min().unwrap();
if min_llc_size < 2 {
return;
}
let plan = topo
.compute_pinning(&Topology::new(1, 2, 2, 1), false, 0)
.unwrap();
for (vcpu_id, host_cpu) in &plan.assignments {
let llc_idx = vcpu_id / 2;
let group = &topo.llc_groups[llc_idx as usize];
assert!(
group.cpus.contains(host_cpu),
"vCPU {} mapped to CPU {} which is not in LLC group {}",
vcpu_id,
host_cpu,
llc_idx,
);
}
}
#[test]
fn hugepages_needed_boundary() {
assert_eq!(hugepages_needed(1), 1); assert_eq!(hugepages_needed(0), 0);
}
#[test]
fn hugepages_needed_exact_multiple() {
assert_eq!(hugepages_needed(1024), 512);
}
#[test]
fn compute_pinning_service_cpu_picks_unpinned() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3]]);
let plan = topo
.compute_pinning(&Topology::new(1, 1, 2, 1), true, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 2);
let service = plan.service_cpu.expect("service_cpu should be set");
let vcpu_cpus: std::collections::HashSet<usize> =
plan.assignments.iter().map(|a| a.1).collect();
assert!(
!vcpu_cpus.contains(&service),
"service CPU {service} must not be assigned to a vCPU",
);
}
#[test]
fn compute_pinning_service_cpu_false_returns_none() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3]]);
let plan = topo
.compute_pinning(&Topology::new(1, 1, 2, 1), false, 0)
.unwrap();
assert!(plan.service_cpu.is_none());
}
#[test]
fn compute_pinning_service_cpu_exact_fit() {
let topo = synthetic_topo(vec![vec![0, 1, 2]]);
let plan = topo
.compute_pinning(&Topology::new(1, 1, 2, 1), true, 0)
.unwrap();
let service = plan.service_cpu.expect("service_cpu should be set");
assert_eq!(service, 2, "service CPU should be the only remaining CPU");
let vcpu_cpus: std::collections::HashSet<usize> =
plan.assignments.iter().map(|a| a.1).collect();
assert!(
!vcpu_cpus.contains(&service),
"service CPU {service} must not overlap vCPU assignments",
);
}
#[test]
fn compute_pinning_service_cpu_insufficient_fails() {
let topo = synthetic_topo(vec![vec![0, 1]]);
let err = topo
.compute_pinning(&Topology::new(1, 1, 2, 1), true, 0)
.unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("3 CPUs") && msg.contains("2 host CPUs"),
"error should mention CPU shortage: {msg}",
);
}
#[test]
fn compute_pinning_service_cpu_multi_llc() {
let topo = synthetic_topo(vec![vec![0, 1, 2], vec![3, 4, 5]]);
let plan = topo
.compute_pinning(&Topology::new(1, 2, 2, 1), true, 0)
.unwrap();
let service = plan.service_cpu.unwrap();
let vcpu_cpus: std::collections::HashSet<usize> =
plan.assignments.iter().map(|a| a.1).collect();
assert!(!vcpu_cpus.contains(&service));
}
#[test]
fn sysfs_cpu_to_node_populated() {
let topo = HostTopology::from_sysfs().unwrap();
if !topo.cpu_to_node.is_empty() {
for (&cpu, &node) in &topo.cpu_to_node {
assert!(
topo.online_cpus.contains(&cpu),
"NUMA mapping for CPU {cpu} but not in online set",
);
assert!(node < 1024, "unexpected NUMA node ID {node} for CPU {cpu}");
}
}
}
#[test]
fn max_cores_per_llc_synthetic() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3], vec![4, 5]]);
assert_eq!(topo.max_cores_per_llc(), 4);
}
#[test]
fn max_cores_per_llc_uniform() {
let topo = synthetic_topo(vec![vec![0, 1, 2], vec![3, 4, 5]]);
assert_eq!(topo.max_cores_per_llc(), 3);
}
#[test]
fn mbind_to_nodes_empty_is_noop() {
mbind_to_nodes(std::ptr::null_mut(), 0, &[]);
mbind_to_nodes(std::ptr::null_mut(), 4096, &[]);
}
fn synthetic_topo_numa(groups: Vec<(usize, Vec<usize>)>) -> HostTopology {
let tagged: Vec<(Vec<usize>, usize)> = groups
.into_iter()
.map(|(node, cpus)| (cpus, node))
.collect();
HostTopology::new_for_tests(&tagged)
}
#[test]
fn llc_numa_node_synthetic() {
let topo = synthetic_topo_numa(vec![
(0, vec![0, 1]),
(0, vec![2, 3]),
(1, vec![4, 5]),
(1, vec![6, 7]),
]);
assert_eq!(topo.llc_numa_node(0), 0);
assert_eq!(topo.llc_numa_node(1), 0);
assert_eq!(topo.llc_numa_node(2), 1);
assert_eq!(topo.llc_numa_node(3), 1);
}
#[test]
fn compute_pinning_numa_two_nodes() {
let topo = synthetic_topo_numa(vec![
(0, vec![0, 1, 2, 3]),
(0, vec![4, 5, 6, 7]),
(1, vec![8, 9, 10, 11]),
(1, vec![12, 13, 14, 15]),
]);
let plan = topo
.compute_pinning(&Topology::new(2, 4, 2, 1), false, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 8);
let node_0_cpus: Vec<usize> = plan
.assignments
.iter()
.filter(|(vcpu, _)| *vcpu < 4) .map(|(_, cpu)| *cpu)
.collect();
let node_0_host_nodes = numa_nodes_for_cpus(&topo, &node_0_cpus);
assert_eq!(
node_0_host_nodes.len(),
1,
"guest NUMA 0 LLCs should all be on one host NUMA node, got {:?}",
node_0_host_nodes,
);
let node_1_cpus: Vec<usize> = plan
.assignments
.iter()
.filter(|(vcpu, _)| *vcpu >= 4) .map(|(_, cpu)| *cpu)
.collect();
let node_1_host_nodes = numa_nodes_for_cpus(&topo, &node_1_cpus);
assert_eq!(
node_1_host_nodes.len(),
1,
"guest NUMA 1 LLCs should all be on one host NUMA node, got {:?}",
node_1_host_nodes,
);
assert_ne!(
node_0_host_nodes.iter().next(),
node_1_host_nodes.iter().next(),
"guest NUMA nodes should map to different host NUMA nodes",
);
}
#[test]
fn numa_aware_llc_order_uneven_llcs_preserves_remainder() {
let topo = synthetic_topo_numa(vec![
(0, vec![0, 1]),
(0, vec![2, 3]),
(0, vec![4, 5]),
(1, vec![6, 7]),
(1, vec![8, 9]),
(1, vec![10, 11]),
]);
let order = topo.numa_aware_llc_order(2, 5, 0);
assert_eq!(
order.len(),
5,
"uneven llc distribution must preserve all LLCs, got {order:?}"
);
let first_three_nodes: std::collections::BTreeSet<usize> = order[..3]
.iter()
.map(|&idx| topo.llc_numa_node(idx))
.collect();
let last_two_nodes: std::collections::BTreeSet<usize> = order[3..]
.iter()
.map(|&idx| topo.llc_numa_node(idx))
.collect();
assert_eq!(
first_three_nodes.len(),
1,
"first 3 LLCs must share a host node"
);
assert_eq!(
last_two_nodes.len(),
1,
"last 2 LLCs must share a host node"
);
assert_ne!(
first_three_nodes, last_two_nodes,
"two guest NUMA nodes must map to distinct host nodes"
);
}
#[test]
fn numa_aware_llc_order_zero_numa_nodes_is_safe() {
let topo = synthetic_topo_numa(vec![(0, vec![0, 1]), (0, vec![2, 3])]);
let order = topo.numa_aware_llc_order(0, 2, 0);
assert_eq!(
order.len(),
2,
"zero-numa fallback must still produce an order"
);
}
#[test]
fn numa_aware_llc_order_fewer_llcs_than_nodes_falls_back() {
let topo = synthetic_topo_numa(vec![
(0, vec![0, 1]),
(0, vec![2, 3]),
(1, vec![4, 5]),
(1, vec![6, 7]),
]);
let order = topo.numa_aware_llc_order(4, 2, 0);
assert_eq!(
order.len(),
2,
"fewer-llcs-than-nodes fallback must still produce 2 entries"
);
}
#[test]
fn compute_pinning_numa_fallback_insufficient_nodes() {
let topo = synthetic_topo_numa(vec![
(0, vec![0, 1]),
(0, vec![2, 3]),
(0, vec![4, 5]),
(0, vec![6, 7]),
]);
let plan = topo
.compute_pinning(&Topology::new(2, 4, 2, 1), false, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 8);
let cpus: Vec<usize> = plan.assignments.iter().map(|a| a.1).collect();
let unique: std::collections::HashSet<usize> = cpus.iter().copied().collect();
assert_eq!(cpus.len(), unique.len());
}
#[test]
fn compute_pinning_numa_single_node_unchanged() {
let topo = synthetic_topo_numa(vec![(0, vec![0, 1, 2, 3]), (1, vec![4, 5, 6, 7])]);
let plan = topo
.compute_pinning(&Topology::new(1, 2, 2, 1), false, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 4);
assert_eq!(plan.assignments[0], (0, 0));
assert_eq!(plan.assignments[1], (1, 1));
assert_eq!(plan.assignments[2], (2, 4));
assert_eq!(plan.assignments[3], (3, 5));
}
#[test]
fn compute_pinning_numa_three_nodes() {
let topo = synthetic_topo_numa(vec![
(0, vec![0, 1]),
(0, vec![2, 3]),
(1, vec![4, 5]),
(1, vec![6, 7]),
(2, vec![8, 9]),
(2, vec![10, 11]),
]);
let plan = topo
.compute_pinning(&Topology::new(3, 6, 1, 1), false, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 6);
for guest_node in 0..3u32 {
let start = guest_node * 2;
let end = start + 2;
let cpus: Vec<usize> = plan
.assignments
.iter()
.filter(|(vcpu, _)| *vcpu >= start && *vcpu < end)
.map(|(_, cpu)| *cpu)
.collect();
let nodes = numa_nodes_for_cpus(&topo, &cpus);
assert_eq!(
nodes.len(),
1,
"guest NUMA {} should be on one host NUMA node, got {:?}",
guest_node,
nodes,
);
}
}
#[test]
fn compute_pinning_numa_with_service_cpu() {
let topo = synthetic_topo_numa(vec![
(0, vec![0, 1, 2, 3]),
(0, vec![4, 5, 6, 7]),
(1, vec![8, 9, 10, 11]),
(1, vec![12, 13, 14, 15]),
]);
let plan = topo
.compute_pinning(&Topology::new(2, 4, 2, 1), true, 0)
.unwrap();
assert_eq!(plan.assignments.len(), 8);
let service = plan.service_cpu.expect("service_cpu should be set");
let vcpu_cpus: std::collections::HashSet<usize> =
plan.assignments.iter().map(|a| a.1).collect();
assert!(
!vcpu_cpus.contains(&service),
"service CPU {service} must not overlap vCPU assignments",
);
}
#[test]
fn llc_numa_node_empty_map() {
let mut topo = HostTopology::new_for_tests(&[(vec![0, 1], 0)]);
topo.cpu_to_node.clear();
topo.host_node_llcs.clear();
assert_eq!(topo.llc_numa_node(0), 0);
}
#[test]
fn compute_pinning_offset_single_llc_wraps() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3]]);
let plan = topo
.compute_pinning(&Topology::new(1, 1, 2, 1), false, 1)
.unwrap();
assert_eq!(plan.assignments.len(), 2);
assert_eq!(plan.assignments[0], (0, 0));
assert_eq!(plan.assignments[1], (1, 1));
}
#[test]
fn compute_pinning_offset_two_llcs_shifts() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3], vec![4, 5, 6, 7]]);
let plan = topo
.compute_pinning(&Topology::new(1, 2, 2, 1), false, 1)
.unwrap();
assert_eq!(plan.assignments.len(), 4);
assert_eq!(plan.assignments[0], (0, 4));
assert_eq!(plan.assignments[1], (1, 5));
assert_eq!(plan.assignments[2], (2, 0));
assert_eq!(plan.assignments[3], (3, 1));
}
#[test]
fn compute_pinning_offset_wraps_modulo() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3], vec![4, 5, 6, 7]]);
let plan = topo
.compute_pinning(&Topology::new(1, 2, 2, 1), false, 2)
.unwrap();
assert_eq!(plan.assignments.len(), 4);
assert_eq!(plan.assignments[0], (0, 0));
assert_eq!(plan.assignments[1], (1, 1));
assert_eq!(plan.assignments[2], (2, 4));
assert_eq!(plan.assignments[3], (3, 5));
}
#[test]
fn compute_pinning_offset_three_llcs_partial() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3], vec![4, 5, 6, 7], vec![8, 9, 10, 11]]);
let plan = topo
.compute_pinning(&Topology::new(1, 2, 2, 1), false, 1)
.unwrap();
assert_eq!(plan.assignments.len(), 4);
assert_eq!(plan.assignments[0], (0, 4));
assert_eq!(plan.assignments[1], (1, 5));
assert_eq!(plan.assignments[2], (2, 8));
assert_eq!(plan.assignments[3], (3, 9));
}
#[test]
fn compute_pinning_offset_large_wraps() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3], vec![4, 5, 6, 7], vec![8, 9, 10, 11]]);
let plan = topo
.compute_pinning(&Topology::new(1, 1, 2, 1), false, 5)
.unwrap();
assert_eq!(plan.assignments.len(), 2);
assert_eq!(plan.assignments[0], (0, 8));
assert_eq!(plan.assignments[1], (1, 9));
}
#[test]
fn compute_pinning_offset_numa_within_rotation() {
let topo = synthetic_topo_numa(vec![
(0, vec![0, 1, 2, 3]),
(0, vec![4, 5, 6, 7]),
(1, vec![8, 9, 10, 11]),
(1, vec![12, 13, 14, 15]),
]);
let plan = topo
.compute_pinning(&Topology::new(2, 4, 2, 1), false, 1)
.unwrap();
assert_eq!(plan.assignments.len(), 8);
assert_eq!(plan.assignments[0], (0, 4));
assert_eq!(plan.assignments[1], (1, 5));
assert_eq!(plan.assignments[2], (2, 0));
assert_eq!(plan.assignments[3], (3, 1));
assert_eq!(plan.assignments[4], (4, 12));
assert_eq!(plan.assignments[5], (5, 13));
assert_eq!(plan.assignments[6], (6, 8));
assert_eq!(plan.assignments[7], (7, 9));
}
#[test]
fn compute_pinning_offset_numa_node_rotation() {
let topo = synthetic_topo_numa(vec![
(0, vec![0, 1, 2, 3]),
(0, vec![4, 5, 6, 7]),
(1, vec![8, 9, 10, 11]),
(1, vec![12, 13, 14, 15]),
]);
let plan = topo
.compute_pinning(&Topology::new(2, 4, 2, 1), false, 2)
.unwrap();
assert_eq!(plan.assignments.len(), 8);
assert_eq!(plan.assignments[0], (0, 8));
assert_eq!(plan.assignments[1], (1, 9));
assert_eq!(plan.assignments[2], (2, 12));
assert_eq!(plan.assignments[3], (3, 13));
assert_eq!(plan.assignments[4], (4, 0));
assert_eq!(plan.assignments[5], (5, 1));
assert_eq!(plan.assignments[6], (6, 4));
assert_eq!(plan.assignments[7], (7, 5));
}
#[test]
fn compute_pinning_offset_with_service_cpu() {
let topo = synthetic_topo(vec![vec![0, 1, 2, 3], vec![4, 5, 6, 7]]);
let plan = topo
.compute_pinning(&Topology::new(1, 2, 2, 1), true, 1)
.unwrap();
assert_eq!(plan.assignments.len(), 4);
assert_eq!(plan.assignments[0], (0, 4));
assert_eq!(plan.assignments[1], (1, 5));
assert_eq!(plan.assignments[2], (2, 0));
assert_eq!(plan.assignments[3], (3, 1));
let service = plan.service_cpu.expect("service_cpu should be set");
assert_eq!(service, 2);
let vcpu_cpus: std::collections::HashSet<usize> =
plan.assignments.iter().map(|a| a.1).collect();
assert!(!vcpu_cpus.contains(&service));
}
#[test]
fn compute_pinning_offset_numa_combined_rotation() {
let topo = synthetic_topo_numa(vec![
(0, vec![0, 1, 2, 3]),
(0, vec![4, 5, 6, 7]),
(1, vec![8, 9, 10, 11]),
(1, vec![12, 13, 14, 15]),
]);
let plan = topo
.compute_pinning(&Topology::new(2, 4, 2, 1), false, 3)
.unwrap();
assert_eq!(plan.assignments.len(), 8);
assert_eq!(plan.assignments[0], (0, 12));
assert_eq!(plan.assignments[1], (1, 13));
assert_eq!(plan.assignments[2], (2, 8));
assert_eq!(plan.assignments[3], (3, 9));
assert_eq!(plan.assignments[4], (4, 4));
assert_eq!(plan.assignments[5], (5, 5));
assert_eq!(plan.assignments[6], (6, 0));
assert_eq!(plan.assignments[7], (7, 1));
}
fn cleanup_lock(path: &str) {
let _ = std::fs::remove_file(path);
}
#[test]
fn resource_lock_exclusive_acquires() {
let path = "/tmp/ktstr-test-flock-excl-acquires.lock";
cleanup_lock(path);
let fd = try_flock(path, FlockMode::Exclusive).expect("open should succeed");
assert!(fd.is_some(), "exclusive lock on fresh file should succeed");
cleanup_lock(path);
}
#[test]
fn resource_lock_shared_acquires() {
let path = "/tmp/ktstr-test-flock-shared-acquires.lock";
cleanup_lock(path);
let fd = try_flock(path, FlockMode::Shared).expect("open should succeed");
assert!(fd.is_some(), "shared lock on fresh file should succeed");
cleanup_lock(path);
}
#[test]
fn resource_lock_exclusive_contention() {
let path = "/tmp/ktstr-test-flock-excl-contention.lock";
cleanup_lock(path);
let holder = try_flock(path, FlockMode::Exclusive)
.expect("open should succeed")
.expect("first lock should succeed");
let second = try_flock(path, FlockMode::Exclusive).expect("open should succeed");
assert!(
second.is_none(),
"second exclusive lock while held should return None",
);
drop(holder);
cleanup_lock(path);
}
#[test]
fn resource_lock_shared_coexist() {
let path = "/tmp/ktstr-test-flock-shared-coexist.lock";
cleanup_lock(path);
let h1 = try_flock(path, FlockMode::Shared)
.expect("open should succeed")
.expect("first shared lock should succeed");
let h2 = try_flock(path, FlockMode::Shared)
.expect("open should succeed")
.expect("second shared lock should succeed");
drop(h1);
drop(h2);
cleanup_lock(path);
}
#[test]
fn resource_lock_exclusive_blocks_shared() {
let path = "/tmp/ktstr-test-flock-excl-blocks-sh.lock";
cleanup_lock(path);
let holder = try_flock(path, FlockMode::Exclusive)
.expect("open should succeed")
.expect("exclusive lock should succeed");
let shared = try_flock(path, FlockMode::Shared).expect("open should succeed");
assert!(
shared.is_none(),
"shared lock should fail while exclusive is held",
);
drop(holder);
cleanup_lock(path);
}
#[test]
fn resource_lock_shared_blocks_exclusive() {
let path = "/tmp/ktstr-test-flock-sh-blocks-excl.lock";
cleanup_lock(path);
let holder = try_flock(path, FlockMode::Shared)
.expect("open should succeed")
.expect("shared lock should succeed");
let excl = try_flock(path, FlockMode::Exclusive).expect("open should succeed");
assert!(
excl.is_none(),
"exclusive lock should fail while shared is held",
);
drop(holder);
cleanup_lock(path);
}
#[test]
fn resource_lock_release_on_drop() {
let path = "/tmp/ktstr-test-flock-release-drop.lock";
cleanup_lock(path);
{
let _holder = try_flock(path, FlockMode::Exclusive)
.expect("open should succeed")
.expect("lock should succeed");
}
let fd = try_flock(path, FlockMode::Exclusive)
.expect("open should succeed")
.expect("lock should be available after drop");
drop(fd);
cleanup_lock(path);
}
#[test]
fn resource_lock_exclusive_success() {
let plan = PinningPlan {
assignments: vec![(0, 90100), (1, 90101)],
service_cpu: None,
llc_indices: vec![90100],
locks: Vec::new(),
};
let llc_indices = &[90100usize];
cleanup_lock("/tmp/ktstr-llc-90100.lock");
let outcome = acquire_resource_locks(&plan, llc_indices, LlcLockMode::Exclusive).unwrap();
match outcome {
LockOutcome::Acquired { llc_offset, locks } => {
assert_eq!(llc_offset, 90100);
assert_eq!(locks.len(), 1);
}
LockOutcome::Unavailable(reason) => {
panic!("expected Acquired, got Unavailable: {reason}");
}
}
cleanup_lock("/tmp/ktstr-llc-90100.lock");
}
#[test]
fn resource_lock_shared_includes_cpu_locks() {
let plan = PinningPlan {
assignments: vec![(0, 90200), (1, 90201)],
service_cpu: None,
llc_indices: vec![90200],
locks: Vec::new(),
};
let llc_indices = &[90200usize];
cleanup_lock("/tmp/ktstr-llc-90200.lock");
cleanup_lock("/tmp/ktstr-cpu-90200.lock");
cleanup_lock("/tmp/ktstr-cpu-90201.lock");
let outcome = acquire_resource_locks(&plan, llc_indices, LlcLockMode::Shared).unwrap();
match outcome {
LockOutcome::Acquired { locks, .. } => {
assert_eq!(locks.len(), 3);
}
LockOutcome::Unavailable(reason) => {
panic!("expected Acquired, got Unavailable: {reason}");
}
}
cleanup_lock("/tmp/ktstr-llc-90200.lock");
cleanup_lock("/tmp/ktstr-cpu-90200.lock");
cleanup_lock("/tmp/ktstr-cpu-90201.lock");
}
#[test]
fn resource_lock_shared_with_service_cpu() {
let plan = PinningPlan {
assignments: vec![(0, 90300)],
service_cpu: Some(90301),
llc_indices: vec![90300],
locks: Vec::new(),
};
let llc_indices = &[90300usize];
cleanup_lock("/tmp/ktstr-llc-90300.lock");
cleanup_lock("/tmp/ktstr-cpu-90300.lock");
cleanup_lock("/tmp/ktstr-cpu-90301.lock");
let outcome = acquire_resource_locks(&plan, llc_indices, LlcLockMode::Shared).unwrap();
match outcome {
LockOutcome::Acquired { locks, .. } => {
assert_eq!(locks.len(), 3);
}
LockOutcome::Unavailable(reason) => {
panic!("expected Acquired, got Unavailable: {reason}");
}
}
cleanup_lock("/tmp/ktstr-llc-90300.lock");
cleanup_lock("/tmp/ktstr-cpu-90300.lock");
cleanup_lock("/tmp/ktstr-cpu-90301.lock");
}
#[test]
fn resource_lock_exclusive_skips_cpu_locks() {
let plan = PinningPlan {
assignments: vec![(0, 90400), (1, 90401)],
service_cpu: Some(90402),
llc_indices: vec![90400],
locks: Vec::new(),
};
let llc_indices = &[90400usize];
cleanup_lock("/tmp/ktstr-llc-90400.lock");
let outcome = acquire_resource_locks(&plan, llc_indices, LlcLockMode::Exclusive).unwrap();
match outcome {
LockOutcome::Acquired { locks, .. } => {
assert_eq!(locks.len(), 1);
}
LockOutcome::Unavailable(reason) => {
panic!("expected Acquired, got Unavailable: {reason}");
}
}
cleanup_lock("/tmp/ktstr-llc-90400.lock");
}
#[test]
fn resource_lock_contention_returns_unavailable() {
let plan = PinningPlan {
assignments: vec![(0, 90500)],
service_cpu: None,
llc_indices: vec![90500],
locks: Vec::new(),
};
let llc_indices = &[90500usize];
cleanup_lock("/tmp/ktstr-llc-90500.lock");
let holder = try_flock("/tmp/ktstr-llc-90500.lock", FlockMode::Exclusive)
.unwrap()
.unwrap();
let outcome = acquire_resource_locks(&plan, llc_indices, LlcLockMode::Exclusive).unwrap();
match outcome {
LockOutcome::Unavailable(reason) => {
assert!(
reason.contains("90500"),
"reason should identify the busy LLC: {reason}",
);
}
LockOutcome::Acquired { .. } => {
panic!("expected Unavailable while lock is held");
}
}
drop(holder);
cleanup_lock("/tmp/ktstr-llc-90500.lock");
}
#[test]
fn resource_lock_all_or_nothing() {
let plan = PinningPlan {
assignments: vec![(0, 90600), (1, 90601)],
service_cpu: None,
llc_indices: vec![90600, 90601],
locks: Vec::new(),
};
let llc_indices = &[90600usize, 90601];
cleanup_lock("/tmp/ktstr-llc-90600.lock");
cleanup_lock("/tmp/ktstr-llc-90601.lock");
let holder = try_flock("/tmp/ktstr-llc-90601.lock", FlockMode::Exclusive)
.unwrap()
.unwrap();
let outcome = acquire_resource_locks(&plan, llc_indices, LlcLockMode::Exclusive).unwrap();
assert!(
matches!(outcome, LockOutcome::Unavailable(_)),
"should fail when second LLC is busy",
);
let reacquire = try_flock("/tmp/ktstr-llc-90600.lock", FlockMode::Exclusive)
.unwrap()
.expect("LLC 90600 should be released after all-or-nothing failure");
drop(reacquire);
drop(holder);
cleanup_lock("/tmp/ktstr-llc-90600.lock");
cleanup_lock("/tmp/ktstr-llc-90601.lock");
}
#[test]
fn resource_lock_shared_cpu_contention() {
let plan = PinningPlan {
assignments: vec![(0, 90700)],
service_cpu: None,
llc_indices: vec![90700],
locks: Vec::new(),
};
let llc_indices = &[90700usize];
cleanup_lock("/tmp/ktstr-llc-90700.lock");
cleanup_lock("/tmp/ktstr-cpu-90700.lock");
let holder = try_flock("/tmp/ktstr-cpu-90700.lock", FlockMode::Exclusive)
.unwrap()
.unwrap();
let outcome = acquire_resource_locks(&plan, llc_indices, LlcLockMode::Shared).unwrap();
assert!(
matches!(outcome, LockOutcome::Unavailable(_)),
"should fail when CPU lock is held",
);
let reacquire = try_flock("/tmp/ktstr-llc-90700.lock", FlockMode::Shared)
.unwrap()
.expect("LLC 90700 should be released after CPU contention");
drop(reacquire);
drop(holder);
cleanup_lock("/tmp/ktstr-llc-90700.lock");
cleanup_lock("/tmp/ktstr-cpu-90700.lock");
}
#[test]
fn resource_lock_empty_llc_indices() {
let plan = PinningPlan {
assignments: vec![(0, 90800)],
service_cpu: None,
llc_indices: vec![],
locks: Vec::new(),
};
let outcome = acquire_resource_locks(&plan, &[], LlcLockMode::Exclusive).unwrap();
match outcome {
LockOutcome::Acquired { llc_offset, locks } => {
assert_eq!(llc_offset, 0);
assert!(locks.is_empty());
}
LockOutcome::Unavailable(reason) => {
panic!("expected Acquired, got Unavailable: {reason}");
}
}
}
#[test]
fn resource_lock_service_cpu_contention() {
let plan = PinningPlan {
assignments: vec![(0, 90900)],
service_cpu: Some(90901),
llc_indices: vec![90850],
locks: Vec::new(),
};
let llc_indices = &[90850usize];
cleanup_lock("/tmp/ktstr-llc-90850.lock");
cleanup_lock("/tmp/ktstr-cpu-90900.lock");
cleanup_lock("/tmp/ktstr-cpu-90901.lock");
let holder = try_flock("/tmp/ktstr-cpu-90901.lock", FlockMode::Exclusive)
.unwrap()
.unwrap();
let outcome = acquire_resource_locks(&plan, llc_indices, LlcLockMode::Shared).unwrap();
match &outcome {
LockOutcome::Unavailable(reason) => {
assert!(
reason.contains("service CPU") && reason.contains("90901"),
"reason should mention service CPU 90901: {reason}",
);
}
LockOutcome::Acquired { .. } => {
panic!("expected Unavailable when service CPU is held");
}
}
let reacquire_llc = try_flock("/tmp/ktstr-llc-90850.lock", FlockMode::Shared)
.unwrap()
.expect("LLC 90850 should be released after service CPU contention");
let reacquire_cpu = try_flock("/tmp/ktstr-cpu-90900.lock", FlockMode::Exclusive)
.unwrap()
.expect("CPU 90900 should be released after service CPU contention");
drop(reacquire_llc);
drop(reacquire_cpu);
drop(holder);
cleanup_lock("/tmp/ktstr-llc-90850.lock");
cleanup_lock("/tmp/ktstr-cpu-90900.lock");
cleanup_lock("/tmp/ktstr-cpu-90901.lock");
}
#[test]
fn cpu_lock_window_success() {
for c in 91300..91303 {
cleanup_lock(&format!("/tmp/ktstr-cpu-{c}.lock"));
}
let locks = try_acquire_cpu_window(91300, 3).unwrap();
assert_eq!(locks.len(), 3);
for c in 91300..91303 {
cleanup_lock(&format!("/tmp/ktstr-cpu-{c}.lock"));
}
}
#[test]
fn cpu_lock_window_contention_all_or_nothing() {
cleanup_lock("/tmp/ktstr-cpu-91400.lock");
cleanup_lock("/tmp/ktstr-cpu-91401.lock");
let holder = try_flock("/tmp/ktstr-cpu-91400.lock", FlockMode::Exclusive)
.unwrap()
.unwrap();
let result = try_acquire_cpu_window(91400, 2);
assert!(result.is_err(), "should fail when first CPU is held");
drop(holder);
let holder2 = try_flock("/tmp/ktstr-cpu-91401.lock", FlockMode::Exclusive)
.unwrap()
.unwrap();
let result2 = try_acquire_cpu_window(91400, 2);
assert!(result2.is_err(), "should fail when second CPU is held");
let reacquire = try_flock("/tmp/ktstr-cpu-91400.lock", FlockMode::Exclusive)
.unwrap()
.expect("CPU 91400 should be released after all-or-nothing");
drop(reacquire);
drop(holder2);
cleanup_lock("/tmp/ktstr-cpu-91400.lock");
cleanup_lock("/tmp/ktstr-cpu-91401.lock");
}
#[test]
fn cpu_lock_zero_count() {
let locks = acquire_cpu_locks(0, 4, None).unwrap();
assert!(locks.is_empty());
}
#[test]
fn cpu_lock_contention_slides_window() {
for c in 91500..91503 {
cleanup_lock(&format!("/tmp/ktstr-cpu-{c}.lock"));
}
let holder = try_flock("/tmp/ktstr-cpu-91500.lock", FlockMode::Exclusive)
.unwrap()
.unwrap();
let result = try_acquire_cpu_window(91500, 2);
assert!(result.is_err(), "window starting at held CPU should fail");
let locks = try_acquire_cpu_window(91501, 2).unwrap();
assert_eq!(locks.len(), 2);
drop(locks);
drop(holder);
for c in 91500..91503 {
cleanup_lock(&format!("/tmp/ktstr-cpu-{c}.lock"));
}
}
#[test]
fn cpu_lock_acquire_success() {
let locks = match acquire_cpu_locks(3, 100, None) {
Ok(l) => l,
Err(e) if e.downcast_ref::<ResourceContention>().is_some() => {
panic!("{e}");
}
Err(e) => panic!("{e:#}"),
};
assert_eq!(locks.len(), 3);
}
#[test]
fn cpu_lock_acquire_slides_past_held() {
cleanup_lock("/tmp/ktstr-cpu-0.lock");
let holder = try_flock("/tmp/ktstr-cpu-0.lock", FlockMode::Exclusive)
.unwrap()
.unwrap();
let locks = match acquire_cpu_locks(2, 100, None) {
Ok(l) => l,
Err(e) if e.downcast_ref::<ResourceContention>().is_some() => {
drop(holder);
cleanup_lock("/tmp/ktstr-cpu-0.lock");
panic!("{e}");
}
Err(e) => panic!("{e:#}"),
};
assert_eq!(locks.len(), 2);
drop(locks);
drop(holder);
cleanup_lock("/tmp/ktstr-cpu-0.lock");
}
#[test]
fn cpu_lock_acquire_no_windows_fit() {
let err = acquire_cpu_locks(2, 0, None).unwrap_err();
assert!(
err.downcast_ref::<ResourceContention>().is_some(),
"error should be ResourceContention: {err}",
);
}
#[test]
fn cpu_lock_acquire_with_llc_shared() {
let _prefix = LlcLockPrefixGuard::new();
let cpu_prefix_dir = tempfile::TempDir::new().expect("tempdir");
let cpu_prefix = format!("{}/cpu-", cpu_prefix_dir.path().display());
CPU_LOCK_PREFIX_OVERRIDE.with(|p| *p.borrow_mut() = Some(cpu_prefix));
struct CpuPrefixGuard;
impl Drop for CpuPrefixGuard {
fn drop(&mut self) {
CPU_LOCK_PREFIX_OVERRIDE.with(|p| *p.borrow_mut() = None);
}
}
let _cpu_prefix = CpuPrefixGuard;
let topo = HostTopology::new_for_tests(&[((0..100).collect(), 0)]);
let locks = match acquire_cpu_locks(2, 100, Some(&topo)) {
Ok(l) => l,
Err(e) if e.downcast_ref::<ResourceContention>().is_some() => {
panic!("{e}");
}
Err(e) => panic!("{e:#}"),
};
assert_eq!(locks.len(), 3);
let llc_path = llc_lock_path(0);
let shared2 = try_flock(&llc_path, FlockMode::Shared)
.unwrap()
.expect("second shared LLC should coexist");
let excl = try_flock(&llc_path, FlockMode::Exclusive).unwrap();
assert!(
excl.is_none(),
"exclusive LLC should fail while shared is held",
);
drop(shared2);
drop(locks);
drop(cpu_prefix_dir);
}
#[test]
fn cpu_lock_llc_shared_protection() {
let _prefix = LlcLockPrefixGuard::new();
let topo = HostTopology::new_for_tests(&[(vec![91200, 91201], 0)]);
let cpus = vec![91200usize, 91201];
let llc_locks = acquire_llc_shared_locks(&topo, &cpus).unwrap();
assert_eq!(llc_locks.len(), 1);
let llc_path = llc_lock_path(0);
let shared2 = try_flock(&llc_path, FlockMode::Shared)
.unwrap()
.expect("second shared LLC should coexist");
let excl = try_flock(&llc_path, FlockMode::Exclusive).unwrap();
assert!(
excl.is_none(),
"exclusive LLC should fail while shared is held",
);
drop(shared2);
drop(llc_locks);
}
struct LlcLockPrefixGuard {
_dir: tempfile::TempDir,
}
impl LlcLockPrefixGuard {
fn new() -> Self {
let dir = tempfile::TempDir::new().expect("tempdir");
let prefix = format!("{}/llc-", dir.path().display());
LLC_LOCK_PREFIX_OVERRIDE.with(|p| *p.borrow_mut() = Some(prefix));
LlcLockPrefixGuard { _dir: dir }
}
}
impl Drop for LlcLockPrefixGuard {
fn drop(&mut self) {
LLC_LOCK_PREFIX_OVERRIDE.with(|p| *p.borrow_mut() = None);
}
}
struct AllowedCpusGuard;
impl AllowedCpusGuard {
fn new(cpus: Vec<usize>) -> Self {
ALLOWED_CPUS_OVERRIDE.with(|p| *p.borrow_mut() = Some(cpus));
AllowedCpusGuard
}
}
impl Drop for AllowedCpusGuard {
fn drop(&mut self) {
ALLOWED_CPUS_OVERRIDE.with(|p| *p.borrow_mut() = None);
}
}
#[test]
fn acquire_llc_plan_none_cap_reserves_thirty_percent_cpus() {
let _prefix = LlcLockPrefixGuard::new();
let _allowed = AllowedCpusGuard::new(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
let topo = HostTopology::new_for_tests(&[
(vec![0, 1], 0),
(vec![2, 3], 0),
(vec![4, 5], 0),
(vec![6, 7], 0),
(vec![8, 9], 0),
]);
let test_topo = crate::topology::TestTopology::synthetic(4, 1);
let plan = acquire_llc_plan(&topo, &test_topo, None)
.expect("clean pool must allow SH on every selected LLC");
assert_eq!(
plan.locked_llcs.len(),
2,
"budget of 3 CPUs flocks 2 LLCs (2 CPUs + 1 partial): {:?}",
plan.locked_llcs,
);
assert_eq!(
plan.cpus.len(),
3,
"plan.cpus is truncated to exactly the budget: {:?}",
plan.cpus,
);
assert_eq!(plan.locks.len(), 2, "one fd per selected LLC");
}
#[test]
fn acquire_llc_plan_bails_on_exclusive_peer() {
let _prefix = LlcLockPrefixGuard::new();
let _allowed = AllowedCpusGuard::new(vec![0]);
let topo = HostTopology::new_for_tests(&[(vec![0], 0)]);
let busy_path = llc_lock_path(0);
let _peer_ex = try_flock(&busy_path, FlockMode::Exclusive)
.unwrap()
.expect("peer EX must acquire on clean pool");
let test_topo = crate::topology::TestTopology::synthetic(4, 1);
let err = acquire_llc_plan(&topo, &test_topo, None)
.expect_err("EX peer must block SH acquisition of the only LLC");
let rendered = format!("{err:#}");
assert!(
rendered.contains("LLC 0"),
"error must name the busy LLC index so fuser can trace: {rendered}",
);
assert!(
err.downcast_ref::<ResourceContention>().is_some(),
"error must downcast to ResourceContention for retry routing: {rendered}",
);
drop(_peer_ex);
}
#[test]
fn acquire_llc_plan_coexists_with_shared_peer() {
let _prefix = LlcLockPrefixGuard::new();
let _allowed = AllowedCpusGuard::new(vec![0]);
let topo = HostTopology::new_for_tests(&[(vec![0], 0)]);
let shared_path = llc_lock_path(0);
let _peer_sh = try_flock(&shared_path, FlockMode::Shared)
.unwrap()
.expect("peer SH must acquire on clean pool");
let test_topo = crate::topology::TestTopology::synthetic(4, 1);
let plan = acquire_llc_plan(&topo, &test_topo, None)
.expect("second SH caller must coexist with the first");
assert_eq!(
plan.locks.len(),
topo.llc_groups.len(),
"second SH caller must acquire one fd per LLC group",
);
}
fn env_lock() -> std::sync::MutexGuard<'static, ()> {
use std::sync::{Mutex, OnceLock};
static ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
ENV_LOCK
.get_or_init(|| Mutex::new(()))
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
}
struct EnvGuard {
name: &'static str,
}
impl EnvGuard {
fn set(name: &'static str, value: &str) -> Self {
unsafe {
std::env::set_var(name, value);
}
EnvGuard { name }
}
fn remove(name: &'static str) -> Self {
unsafe {
std::env::remove_var(name);
}
EnvGuard { name }
}
}
impl Drop for EnvGuard {
fn drop(&mut self) {
unsafe {
std::env::remove_var(self.name);
}
}
}
#[test]
fn cpu_cap_new_rejects_zero() {
let err = CpuCap::new(0).unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("≥ 1"), "msg={msg}");
assert!(msg.contains("got 0"), "msg={msg}");
}
#[test]
fn cpu_cap_new_accepts_one() {
let cap = CpuCap::new(1).expect("cap of 1 must succeed");
assert_eq!(cap.effective_count(4).unwrap(), 1);
}
#[test]
fn cpu_cap_new_accepts_usize_max() {
let cap = CpuCap::new(usize::MAX).expect("MAX accepted at construction");
assert!(cap.effective_count(usize::MAX).is_ok());
}
#[test]
fn cpu_cap_effective_count_fits() {
let cap = CpuCap::new(3).unwrap();
assert_eq!(cap.effective_count(4).unwrap(), 3);
assert_eq!(cap.effective_count(3).unwrap(), 3);
}
#[test]
fn cpu_cap_effective_count_exceeds_host() {
let cap = CpuCap::new(8).unwrap();
let err = cap.effective_count(4).expect_err("8 > 4 must error");
let msg = format!("{err:#}");
assert!(msg.contains("8"), "msg must name requested cap: {msg}");
assert!(msg.contains("4"), "msg must name allowed-CPU count: {msg}");
assert!(
err.downcast_ref::<ResourceContention>().is_some(),
"must be a ResourceContention for retry routing: {msg}",
);
}
#[test]
fn cpu_cap_effective_count_at_host_boundary() {
let cap = CpuCap::new(4).unwrap();
assert_eq!(cap.effective_count(4).unwrap(), 4);
}
#[test]
fn cpu_cap_resolve_cli_wins_over_env() {
let _lock = env_lock();
let _env = EnvGuard::set("KTSTR_CPU_CAP", "99");
let cap = CpuCap::resolve(Some(3)).unwrap().expect("CLI flag set");
assert_eq!(cap.effective_count(4).unwrap(), 3, "CLI wins");
}
#[test]
fn cpu_cap_resolve_no_cli_no_env_returns_none() {
let _lock = env_lock();
let _env = EnvGuard::remove("KTSTR_CPU_CAP");
assert!(CpuCap::resolve(None).unwrap().is_none());
}
#[test]
fn cpu_cap_resolve_env_set() {
let _lock = env_lock();
let _env = EnvGuard::set("KTSTR_CPU_CAP", "2");
let cap = CpuCap::resolve(None)
.expect("resolve must succeed")
.expect("env-set cap must yield Some");
assert_eq!(cap.effective_count(8).unwrap(), 2);
}
#[test]
fn cpu_cap_resolve_empty_env_is_absent() {
let _lock = env_lock();
let _env = EnvGuard::set("KTSTR_CPU_CAP", "");
assert!(CpuCap::resolve(None).unwrap().is_none());
}
#[test]
fn cpu_cap_resolve_non_numeric_env_errors() {
let _lock = env_lock();
let _env = EnvGuard::set("KTSTR_CPU_CAP", "not-a-number");
let err = CpuCap::resolve(None).expect_err("non-numeric must error");
let msg = format!("{err:#}");
assert!(msg.contains("KTSTR_CPU_CAP"), "msg={msg}");
}
#[test]
fn cpu_cap_resolve_zero_env_rejected() {
let _lock = env_lock();
let _env = EnvGuard::set("KTSTR_CPU_CAP", "0");
let err = CpuCap::resolve(None).expect_err("zero must error");
let msg = format!("{err:#}");
assert!(msg.contains("≥ 1"), "msg={msg}");
assert!(msg.contains("got 0"), "msg={msg}");
}
#[test]
fn cpu_cap_resolve_zero_cli_rejected_even_with_valid_env() {
let _lock = env_lock();
let _env = EnvGuard::set("KTSTR_CPU_CAP", "2");
let err = CpuCap::resolve(Some(0)).expect_err("cli=0 must error");
let msg = format!("{err:#}");
assert!(msg.contains("≥ 1"), "msg={msg}");
}
#[test]
fn env_guard_set_and_drop_removes_variable() {
let _lock = env_lock();
let probe = "KTSTR_CPU_CAP_ENV_GUARD_TEST";
{
let _env = EnvGuard::set(probe, "abc");
assert_eq!(
std::env::var(probe).ok().as_deref(),
Some("abc"),
"set must apply immediately",
);
}
assert!(
std::env::var(probe).is_err(),
"EnvGuard::drop must remove the variable",
);
}
fn synth_host_topo(groups: &[(Vec<usize>, usize)]) -> HostTopology {
HostTopology::new_for_tests(groups)
}
#[test]
fn host_llcs_by_numa_node_single_node() {
let topo = synth_host_topo(&[(vec![0, 1], 0), (vec![2, 3], 0), (vec![4, 5], 0)]);
let map = topo.host_llcs_by_numa_node();
assert_eq!(map.len(), 1, "single-node host has one entry");
assert_eq!(map.get(&0), Some(&vec![0, 1, 2]));
}
#[test]
fn host_llcs_by_numa_node_dual_node() {
let topo = synth_host_topo(&[
(vec![0, 1], 0),
(vec![2, 3], 1),
(vec![4, 5], 0),
(vec![6, 7], 1),
]);
let map = topo.host_llcs_by_numa_node();
assert_eq!(map.len(), 2);
assert_eq!(map.get(&0), Some(&vec![0, 2]));
assert_eq!(map.get(&1), Some(&vec![1, 3]));
}
#[test]
fn numa_nodes_with_capacity_asymmetric() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 0), (vec![2], 0), (vec![3], 1)]);
let cap2: Vec<usize> = topo
.numa_nodes_with_capacity(2)
.into_iter()
.map(|(node, _)| node)
.collect();
assert_eq!(cap2, vec![0], "only node 0 has ≥ 2 LLCs");
let cap1: Vec<usize> = topo
.numa_nodes_with_capacity(1)
.into_iter()
.map(|(node, _)| node)
.collect();
assert_eq!(cap1, vec![0, 1], "both nodes have ≥ 1 LLC");
}
#[test]
fn numa_nodes_with_capacity_over_max_returns_empty() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 1)]);
assert!(topo.numa_nodes_with_capacity(99).is_empty());
}
#[test]
fn numa_nodes_sorted_by_distance_identity_closure() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 1), (vec![2], 2)]);
let order =
topo.numa_nodes_sorted_by_distance(1, |from, to| if from == to { 10 } else { 20 });
assert_eq!(order[0], 1, "anchor node first");
assert_eq!(
&order[1..],
&[0, 2],
"tied-distance nodes in ascending order"
);
}
#[test]
fn numa_nodes_sorted_by_distance_unreachable_demoted() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 1), (vec![2], 2)]);
let order = topo.numa_nodes_sorted_by_distance(0, |from, to| match (from, to) {
(0, 0) => 10,
(0, 1) => 20,
(0, 2) => 255,
_ => 20,
});
assert_eq!(order, vec![0, 1, 2]);
assert_eq!(*order.last().unwrap(), 2, "unreachable node is last");
}
#[test]
fn numa_nodes_sorted_by_distance_skips_empty_nodes() {
let topo = synth_host_topo(&[(vec![0], 0)]);
let order = topo.numa_nodes_sorted_by_distance(99, |_, _| 20);
assert_eq!(order, vec![0], "only node 0 is in host_node_llcs");
}
#[test]
fn acquire_llc_plan_rejects_cap_over_allowed_cpus() {
let _allowed = AllowedCpusGuard::new(vec![0, 1]);
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 0)]);
let test_topo = crate::topology::TestTopology::synthetic(4, 1);
let cap = CpuCap::new(3).unwrap();
let err = acquire_llc_plan(&topo, &test_topo, Some(cap))
.expect_err("cap > allowed_cpus must error");
assert!(
err.downcast_ref::<ResourceContention>().is_some(),
"must be ResourceContention: {err:#}"
);
}
#[test]
fn plan_from_snapshots_returns_ascending_indices() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 0), (vec![2], 0), (vec![3], 0)]);
let snapshots: Vec<LlcSnapshot> = (0..4)
.map(|idx| LlcSnapshot {
llc_idx: idx,
lockfile_path: std::path::PathBuf::from(format!("/tmp/ktstr-llc-{idx}.lock")),
holders: Vec::new(),
holder_count: if idx >= 2 { 5 } else { 0 },
})
.collect();
let allowed: std::collections::BTreeSet<usize> = (0..4).collect();
let selected = plan_from_snapshots(
&snapshots,
3,
&topo,
&allowed,
|_, _| 10, );
assert_eq!(selected, vec![0, 2, 3], "step e sorts ascending");
}
#[test]
fn plan_from_snapshots_target_ge_all_selects_every_llc() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 1), (vec![2], 2)]);
let snapshots: Vec<LlcSnapshot> = (0..3)
.map(|idx| LlcSnapshot {
llc_idx: idx,
lockfile_path: std::path::PathBuf::from(format!("/tmp/ktstr-llc-{idx}.lock")),
holders: Vec::new(),
holder_count: 0,
})
.collect();
let allowed: std::collections::BTreeSet<usize> = (0..3).collect();
let selected = plan_from_snapshots(&snapshots, 3, &topo, &allowed, |_, _| 10);
assert_eq!(selected, vec![0, 1, 2]);
let selected_over = plan_from_snapshots(&snapshots, 999, &topo, &allowed, |_, _| 10);
assert_eq!(selected_over, vec![0, 1, 2], "target > len clamps");
}
#[test]
fn plan_from_snapshots_target_zero_returns_empty() {
let topo = synth_host_topo(&[(vec![0], 0)]);
let snapshots: Vec<LlcSnapshot> = vec![LlcSnapshot {
llc_idx: 0,
lockfile_path: std::path::PathBuf::from("/tmp/ktstr-llc-0.lock"),
holders: Vec::new(),
holder_count: 0,
}];
let allowed: std::collections::BTreeSet<usize> = [0].into_iter().collect();
let selected = plan_from_snapshots(&snapshots, 0, &topo, &allowed, |_, _| 10);
assert!(selected.is_empty());
}
#[test]
fn plan_from_snapshots_prefers_higher_holder_count() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 0)]);
let snapshots: Vec<LlcSnapshot> = vec![
LlcSnapshot {
llc_idx: 0,
lockfile_path: std::path::PathBuf::from("/tmp/ktstr-llc-0.lock"),
holders: Vec::new(),
holder_count: 0,
},
LlcSnapshot {
llc_idx: 1,
lockfile_path: std::path::PathBuf::from("/tmp/ktstr-llc-1.lock"),
holders: Vec::new(),
holder_count: 5,
},
];
let allowed: std::collections::BTreeSet<usize> = (0..2).collect();
let selected = plan_from_snapshots(&snapshots, 1, &topo, &allowed, |_, _| 10);
assert_eq!(
selected,
vec![1],
"target=1 with holders [0,5] must pick LLC 1 \
(consolidation preference), not LLC 0 (fresh)"
);
}
#[test]
fn plan_from_snapshots_always_ascending_across_target_range() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 1), (vec![2], 0), (vec![3], 1)]);
let snapshots: Vec<LlcSnapshot> = vec![
LlcSnapshot {
llc_idx: 0,
lockfile_path: std::path::PathBuf::from("/tmp/ktstr-llc-0.lock"),
holders: Vec::new(),
holder_count: 3,
},
LlcSnapshot {
llc_idx: 1,
lockfile_path: std::path::PathBuf::from("/tmp/ktstr-llc-1.lock"),
holders: Vec::new(),
holder_count: 0,
},
LlcSnapshot {
llc_idx: 2,
lockfile_path: std::path::PathBuf::from("/tmp/ktstr-llc-2.lock"),
holders: Vec::new(),
holder_count: 7,
},
LlcSnapshot {
llc_idx: 3,
lockfile_path: std::path::PathBuf::from("/tmp/ktstr-llc-3.lock"),
holders: Vec::new(),
holder_count: 1,
},
];
let allowed: std::collections::BTreeSet<usize> = (0..4).collect();
for target_cpus in 1..=snapshots.len() {
let selected = plan_from_snapshots(&snapshots, target_cpus, &topo, &allowed, |_, _| 10);
assert_eq!(
selected.len(),
target_cpus,
"target_cpus={target_cpus} must produce {target_cpus} selections, got {selected:?}"
);
assert!(
selected.windows(2).all(|w| w[0] < w[1]),
"target_cpus={target_cpus}: selection {selected:?} is not strictly ascending",
);
}
}
#[test]
fn make_jobs_for_plan_matches_cpu_count() {
let plan = LlcPlan {
locked_llcs: vec![0, 1],
cpus: vec![0, 1, 2, 3],
mems: std::collections::BTreeSet::new(),
snapshot: Vec::new(),
locks: Vec::new(),
};
assert_eq!(make_jobs_for_plan(&plan), 4);
}
#[test]
fn make_jobs_for_plan_empty_cpus_floors_to_one() {
let plan = LlcPlan {
locked_llcs: Vec::new(),
cpus: Vec::new(),
mems: std::collections::BTreeSet::new(),
snapshot: Vec::new(),
locks: Vec::new(),
};
assert_eq!(
make_jobs_for_plan(&plan),
1,
"empty-cpus must floor to 1, not 0 — -j0 is unbounded",
);
}
#[test]
fn format_llc_list_with_numa_info() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 0), (vec![2], 1), (vec![3], 1)]);
let rendered = format_llc_list(&[0, 2], &topo);
assert!(
rendered.contains("0 (node 0)"),
"must annotate LLC 0 with its node: {rendered}",
);
assert!(
rendered.contains("2 (node 1)"),
"must annotate LLC 2 with its node: {rendered}",
);
assert_eq!(rendered, "[0 (node 0), 2 (node 1)]");
}
#[test]
fn format_llc_list_single_llc() {
let topo = synth_host_topo(&[(vec![0], 0)]);
let rendered = format_llc_list(&[0], &topo);
assert_eq!(rendered, "[0 (node 0)]");
}
#[test]
fn format_llc_list_without_numa_info() {
let mut topo = synth_host_topo(&[(vec![0], 0), (vec![1], 0)]);
topo.cpu_to_node.clear();
let rendered = format_llc_list(&[0, 1], &topo);
assert_eq!(
rendered, "[0, 1]",
"degraded-host form drops node annotation"
);
}
#[test]
fn should_warn_cross_node_polarity() {
use std::collections::BTreeSet;
let empty: BTreeSet<usize> = BTreeSet::new();
assert!(
!should_warn_cross_node(&empty),
"empty mems must NOT warn (degenerate plan with no NUMA info)",
);
let single: BTreeSet<usize> = [0].into_iter().collect();
assert!(
!should_warn_cross_node(&single),
"single-node plan must NOT warn — the whole point of the cap \
is to fit on one node when possible",
);
let dual: BTreeSet<usize> = [0, 1].into_iter().collect();
assert!(
should_warn_cross_node(&dual),
"two-node plan MUST warn — operator picked a cap that \
couldn't fit on one node and deserves to hear about it",
);
let triple: BTreeSet<usize> = [0, 1, 2].into_iter().collect();
assert!(
should_warn_cross_node(&triple),
"three-node plan MUST warn — same rationale as dual",
);
}
#[test]
fn warn_if_cross_node_spill_predicate_gates_stderr() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 1)]);
let multi_plan = LlcPlan {
locked_llcs: vec![0, 1],
cpus: vec![0, 1],
mems: [0usize, 1].into_iter().collect(),
snapshot: Vec::new(),
locks: Vec::new(),
};
assert!(should_warn_cross_node(&multi_plan.mems));
warn_if_cross_node_spill(&multi_plan, &topo);
let single_plan = LlcPlan {
locked_llcs: vec![0],
cpus: vec![0],
mems: [0usize].into_iter().collect(),
snapshot: Vec::new(),
locks: Vec::new(),
};
assert!(!should_warn_cross_node(&single_plan.mems));
warn_if_cross_node_spill(&single_plan, &topo);
}
#[test]
fn cpu_cap_effective_count_on_zero_llc_host() {
let cap = CpuCap::new(1).unwrap();
let err = cap.effective_count(0).expect_err("1 > 0 must error");
assert!(
err.downcast_ref::<ResourceContention>().is_some(),
"must be ResourceContention for retry routing",
);
}
#[test]
fn acquire_llc_plan_consolidates_on_peer_held_llc() {
let _prefix = LlcLockPrefixGuard::new();
let topo = HostTopology::new_for_tests(&[(vec![0], 0), (vec![1], 0)]);
let target_lock = llc_lock_path(1);
crate::flock::materialize(&target_lock).expect("materialize lockfile");
let child = std::process::Command::new("flock")
.args(["-s", "-n", &target_lock, "sleep", "2"])
.spawn();
let mut child = match child {
Ok(c) => c,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
eprintln!(
"acquire_llc_plan_consolidates_on_peer_held_llc: \
flock(1) not available, skipping ({e})"
);
return;
}
Err(e) => panic!("spawn flock(1): {e}"),
};
std::thread::sleep(std::time::Duration::from_millis(50));
let test_topo = crate::topology::TestTopology::synthetic(2, 1);
let cap = CpuCap::new(1).expect("cap=1 valid");
let plan = acquire_llc_plan(&topo, &test_topo, Some(cap))
.expect("SH is reentrant — parent SH must coexist with child SH");
assert_eq!(
plan.locked_llcs,
vec![1],
"cap=1 with child holding SH on LLC 1 must pick LLC 1 \
(consolidation over fresh LLC 0); got {:?}",
plan.locked_llcs,
);
drop(plan);
let _ = child.wait();
}
#[test]
fn acquire_max_toctou_retries_pinned_at_one() {
assert_eq!(
ACQUIRE_MAX_TOCTOU_RETRIES, 1,
"retry budget must be 1 — higher values amplify livelock",
);
}
#[test]
fn acquire_llc_plan_retry_succeeds_on_attempt_one() {
let _allowed = AllowedCpusGuard::new(vec![93500, 93501]);
let topo = synth_host_topo(&[(vec![93500], 0), (vec![93501], 0)]);
cleanup_lock("/tmp/ktstr-llc-0.lock");
cleanup_lock("/tmp/ktstr-llc-1.lock");
let test_topo = crate::topology::TestTopology::synthetic(2, 1);
let counter = std::cell::Cell::new(0u32);
let plan =
acquire_llc_plan_with_acquire_fn(&topo, &test_topo, None, |_selected, _snapshots| {
let n = counter.get();
counter.set(n + 1);
if n == 0 {
Ok(None)
} else {
Ok(Some(Vec::new()))
}
})
.expect("retry on attempt 1 must succeed");
assert_eq!(counter.get(), 2, "acquire_fn called exactly twice");
assert_eq!(plan.locked_llcs, vec![0]);
cleanup_lock("/tmp/ktstr-llc-0.lock");
cleanup_lock("/tmp/ktstr-llc-1.lock");
}
#[test]
fn acquire_llc_plan_retry_exhausted_bails_with_resource_contention() {
let _allowed = AllowedCpusGuard::new(vec![93600]);
let topo = synth_host_topo(&[(vec![93600], 0)]);
cleanup_lock("/tmp/ktstr-llc-0.lock");
let test_topo = crate::topology::TestTopology::synthetic(1, 1);
let counter = std::cell::Cell::new(0u32);
let err =
acquire_llc_plan_with_acquire_fn(&topo, &test_topo, None, |_selected, _snapshots| {
counter.set(counter.get() + 1);
Ok(None)
})
.expect_err("every attempt returns None — must bail after retries");
assert_eq!(
counter.get(),
ACQUIRE_MAX_TOCTOU_RETRIES + 1,
"acquire_fn called exactly ACQUIRE_MAX_TOCTOU_RETRIES + 1 times",
);
assert!(
err.downcast_ref::<ResourceContention>().is_some(),
"must downcast to ResourceContention for retry routing: {err:#}",
);
let msg = format!("{err:#}");
assert!(
msg.contains("TOCTOU retry"),
"message must name the retry outcome: {msg}",
);
cleanup_lock("/tmp/ktstr-llc-0.lock");
}
#[test]
fn plan_from_snapshots_consolidation_overrides_fresh_ordering() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 0), (vec![2], 0), (vec![3], 0)]);
let snapshots: Vec<LlcSnapshot> = (0..4)
.map(|idx| LlcSnapshot {
llc_idx: idx,
lockfile_path: std::path::PathBuf::from(format!("/tmp/ktstr-llc-{idx}.lock")),
holders: Vec::new(),
holder_count: if idx == 3 { 5 } else { 0 },
})
.collect();
let allowed: std::collections::BTreeSet<usize> = (0..4).collect();
let selected = plan_from_snapshots(&snapshots, 1, &topo, &allowed, |_, _| 10);
assert_eq!(
selected,
vec![3],
"target=1 with peer-held LLC 3 must pick LLC 3, not the \
lowest-index fresh LLC 0 — consolidation overrides fresh",
);
}
#[test]
fn plan_from_snapshots_single_node_fit_no_spill() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 0), (vec![2], 1), (vec![3], 1)]);
let snapshots: Vec<LlcSnapshot> = (0..4)
.map(|idx| LlcSnapshot {
llc_idx: idx,
lockfile_path: std::path::PathBuf::from(format!("/tmp/ktstr-llc-{idx}.lock")),
holders: Vec::new(),
holder_count: 0,
})
.collect();
let allowed: std::collections::BTreeSet<usize> = (0..4).collect();
let selected = plan_from_snapshots(&snapshots, 2, &topo, &allowed, |from, to| {
if from == to { 10 } else { 20 }
});
assert_eq!(
selected,
vec![0, 1],
"target=2 must stay on seed node 0 (LLCs 0,1); seed-node \
capacity (2) covers the request, no spill to node 1 allowed",
);
}
#[test]
fn plan_from_snapshots_equal_scores_tiebreak_ascending() {
let topo = synth_host_topo(&[(vec![0], 0), (vec![1], 0), (vec![2], 0), (vec![3], 0)]);
let snapshots: Vec<LlcSnapshot> = (0..4)
.map(|idx| LlcSnapshot {
llc_idx: idx,
lockfile_path: std::path::PathBuf::from(format!("/tmp/ktstr-llc-{idx}.lock")),
holders: Vec::new(),
holder_count: 5,
})
.collect();
let allowed: std::collections::BTreeSet<usize> = (0..4).collect();
let selected = plan_from_snapshots(&snapshots, 2, &topo, &allowed, |_, _| 10);
assert_eq!(
selected,
vec![0, 1],
"equal consolidation scores must tiebreak on llc_idx ASC \
— selected={selected:?}",
);
}
#[test]
fn default_cpu_budget_30_percent_rounded_up_min_one() {
assert_eq!(default_cpu_budget(0), 1, "min-1 floor");
assert_eq!(default_cpu_budget(1), 1, "ceil(0.3) = 1");
assert_eq!(default_cpu_budget(3), 1, "ceil(0.9) = 1");
assert_eq!(default_cpu_budget(4), 2, "ceil(1.2) = 2");
assert_eq!(default_cpu_budget(10), 3, "ceil(3.0) = 3");
assert_eq!(default_cpu_budget(100), 30, "exact 30%");
}
#[test]
fn acquire_llc_plan_bails_when_no_llc_overlaps_allowed() {
let _prefix = LlcLockPrefixGuard::new();
let _allowed = AllowedCpusGuard::new(vec![100, 101]);
let topo = HostTopology::new_for_tests(&[(vec![0], 0), (vec![1], 0)]);
let test_topo = crate::topology::TestTopology::synthetic(4, 1);
let err = acquire_llc_plan(&topo, &test_topo, None)
.expect_err("no LLC overlap must bail, not silently run");
let msg = format!("{err:#}");
assert!(
msg.contains("no host LLC overlaps"),
"err must name the no-overlap condition: {msg}"
);
}
#[test]
fn plan_from_snapshots_filters_llcs_outside_allowed_set() {
let topo = synth_host_topo(&[
(vec![0, 1], 0),
(vec![2, 3], 0),
(vec![4, 5], 0),
(vec![6, 7], 0),
]);
let snapshots: Vec<LlcSnapshot> = (0..4)
.map(|idx| LlcSnapshot {
llc_idx: idx,
lockfile_path: std::path::PathBuf::from(format!("/tmp/ktstr-llc-{idx}.lock")),
holders: Vec::new(),
holder_count: 0,
})
.collect();
let allowed: std::collections::BTreeSet<usize> = [0, 1, 4, 5].into_iter().collect();
let selected = plan_from_snapshots(&snapshots, 3, &topo, &allowed, |_, _| 10);
assert_eq!(
selected,
vec![0, 2],
"planner must skip LLCs 1 and 3 (no allowed-CPU overlap) \
and pick LLCs 0 and 2 whose CPUs are fully in allowed; \
got {selected:?}"
);
}
#[test]
fn acquire_llc_plan_partial_take_last_llc_matches_exact_budget() {
let _prefix = LlcLockPrefixGuard::new();
let _allowed = AllowedCpusGuard::new(vec![0, 1, 2, 3, 4, 5, 6, 7]);
let topo = HostTopology::new_for_tests(&[(vec![0, 1, 2, 3], 0), (vec![4, 5, 6, 7], 0)]);
let test_topo = crate::topology::TestTopology::synthetic(4, 1);
let cap = CpuCap::new(5).expect("cap=5 valid");
let plan = acquire_llc_plan(&topo, &test_topo, Some(cap))
.expect("clean pool must allow SH on both LLCs");
assert_eq!(
plan.locked_llcs,
vec![0, 1],
"budget of 5 CPUs crosses LLC boundary — both must be flocked"
);
assert_eq!(
plan.cpus.len(),
5,
"plan.cpus is EXACTLY the budget, not rounded up: {:?}",
plan.cpus,
);
assert_eq!(plan.cpus, vec![0, 1, 2, 3, 4]);
}
#[test]
fn plan_from_snapshots_partial_llc_overlap_counted_correctly() {
let topo = synth_host_topo(&[(vec![0, 1], 0), (vec![2, 3], 0)]);
let snapshots: Vec<LlcSnapshot> = (0..2)
.map(|idx| LlcSnapshot {
llc_idx: idx,
lockfile_path: std::path::PathBuf::from(format!("/tmp/ktstr-llc-{idx}.lock")),
holders: Vec::new(),
holder_count: 0,
})
.collect();
let allowed: std::collections::BTreeSet<usize> = [0, 2].into_iter().collect();
let selected = plan_from_snapshots(&snapshots, 2, &topo, &allowed, |_, _| 10);
assert_eq!(
selected,
vec![0, 1],
"target_cpus=2 with 1 allowed CPU per LLC must pick \
BOTH LLCs — each contributes 1, total 2 meets budget"
);
}
#[test]
fn acquire_llc_plan_cross_node_spill_mems_union() {
let _prefix = LlcLockPrefixGuard::new();
let _allowed = AllowedCpusGuard::new(vec![0, 1, 2, 3]);
let topo =
HostTopology::new_for_tests(&[(vec![0], 0), (vec![1], 0), (vec![2], 1), (vec![3], 1)]);
let test_topo = crate::topology::TestTopology::synthetic(4, 2);
let cap = CpuCap::new(3).expect("cap=3 valid");
let plan = acquire_llc_plan(&topo, &test_topo, Some(cap))
.expect("clean pool must allow 3-CPU acquisition");
assert_eq!(
plan.locked_llcs.len(),
3,
"cap=3 CPUs with 1-CPU LLCs must reserve exactly 3 LLCs, got {:?}",
plan.locked_llcs,
);
assert_eq!(
plan.mems.len(),
2,
"3 LLCs split across 2 nodes → mems must span BOTH nodes; \
got {:?} (locked_llcs={:?})",
plan.mems,
plan.locked_llcs,
);
assert!(
plan.mems.contains(&0) && plan.mems.contains(&1),
"mems must contain BOTH node 0 and node 1 after cross-node \
spill; got {:?}",
plan.mems,
);
}
}