use crate::compat::ROOT_PREFIX;
use crate::cpumask::read_cpulist;
use crate::misc::find_best_split_size;
use crate::misc::read_file_byte;
use crate::misc::read_file_usize_vec;
use crate::misc::read_from_file;
use crate::Cpumask;
use anyhow::bail;
use anyhow::Result;
use glob::glob;
use log::info;
use log::warn;
use sscanf::sscanf;
use std::cmp::min;
use std::collections::BTreeMap;
use std::io::Write;
use std::path::Path;
use std::sync::Arc;
#[cfg(feature = "gpu-topology")]
use crate::gpu::{create_gpus, Gpu, GpuIndex};
lazy_static::lazy_static! {
pub static ref NR_CPU_IDS: usize = read_cpu_ids().unwrap().last().unwrap() + 1;
pub static ref NR_CPUS_POSSIBLE: usize = libbpf_rs::num_possible_cpus().unwrap();
pub static ref NR_PARTITION_MIN_CORES: usize = 2;
pub static ref NR_PARTITION_MAX_CORES: usize = 8;
}
#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum CoreType {
Big { turbo: bool },
Little,
}
#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct Cpu {
pub id: usize,
pub min_freq: usize,
pub max_freq: usize,
pub base_freq: usize,
pub cpu_capacity: usize,
pub smt_level: usize,
pub pm_qos_resume_latency_us: usize,
pub trans_lat_ns: usize,
pub l2_id: usize,
pub l3_id: usize,
pub cache_size: usize,
pub core_type: CoreType,
pub core_id: usize,
pub llc_id: usize,
pub node_id: usize,
pub package_id: usize,
pub cluster_id: isize,
}
#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
pub struct Core {
pub id: usize,
pub kernel_id: usize,
pub cluster_id: isize,
pub cpus: BTreeMap<usize, Arc<Cpu>>,
pub span: Cpumask,
pub core_type: CoreType,
pub llc_id: usize,
pub node_id: usize,
}
#[derive(Debug, Clone)]
pub struct Llc {
pub id: usize,
pub kernel_id: usize,
pub cores: BTreeMap<usize, Arc<Core>>,
pub span: Cpumask,
pub node_id: usize,
pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
}
#[derive(Debug, Clone)]
pub struct Node {
pub id: usize,
pub distance: Vec<usize>,
pub llcs: BTreeMap<usize, Arc<Llc>>,
pub span: Cpumask,
pub all_cores: BTreeMap<usize, Arc<Core>>,
pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
#[cfg(feature = "gpu-topology")]
pub gpus: BTreeMap<GpuIndex, Gpu>,
}
#[derive(Debug)]
pub struct Topology {
pub nodes: BTreeMap<usize, Node>,
pub span: Cpumask,
pub smt_enabled: bool,
pub all_llcs: BTreeMap<usize, Arc<Llc>>,
pub all_cores: BTreeMap<usize, Arc<Core>>,
pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
}
impl Topology {
fn instantiate(span: Cpumask, mut nodes: BTreeMap<usize, Node>) -> Result<Self> {
let mut topo_llcs = BTreeMap::new();
let mut topo_cores = BTreeMap::new();
let mut topo_cpus = BTreeMap::new();
for (_node_id, node) in nodes.iter_mut() {
let mut node_cores = BTreeMap::new();
let mut node_cpus = BTreeMap::new();
for (&llc_id, llc) in node.llcs.iter_mut() {
let llc_mut = Arc::get_mut(llc).unwrap();
let mut llc_cpus = BTreeMap::new();
for (&core_id, core) in llc_mut.cores.iter_mut() {
let core_mut = Arc::get_mut(core).unwrap();
let smt_level = core_mut.cpus.len();
for (&cpu_id, cpu) in core_mut.cpus.iter_mut() {
let cpu_mut = Arc::get_mut(cpu).unwrap();
cpu_mut.smt_level = smt_level;
if topo_cpus
.insert(cpu_id, cpu.clone())
.or(node_cpus.insert(cpu_id, cpu.clone()))
.or(llc_cpus.insert(cpu_id, cpu.clone()))
.is_some()
{
bail!("Duplicate CPU ID {}", cpu_id);
}
}
topo_cores
.insert(core_id, core.clone())
.or(node_cores.insert(core_id, core.clone()));
}
llc_mut.all_cpus = llc_cpus;
if topo_llcs.insert(llc_id, llc.clone()).is_some() {
bail!("Duplicate LLC ID {}", llc_id);
}
}
node.all_cores = node_cores;
node.all_cpus = node_cpus;
}
Ok(Topology {
nodes,
span,
smt_enabled: is_smt_active().unwrap_or(false),
all_llcs: topo_llcs,
all_cores: topo_cores,
all_cpus: topo_cpus,
})
}
pub fn new() -> Result<Topology> {
Self::with_virt_llcs(None)
}
pub fn with_virt_llcs(nr_cores_per_vllc: Option<(usize, usize)>) -> Result<Topology> {
let span = cpus_online()?;
let mut topo_ctx = TopoCtx::new();
let path = format!("{}/sys/devices/system/node", *ROOT_PREFIX);
let nodes = if Path::new(&path).exists() {
create_numa_nodes(&span, &mut topo_ctx, nr_cores_per_vllc)?
} else {
create_default_node(&span, &mut topo_ctx, false, nr_cores_per_vllc)?
};
Self::instantiate(span, nodes)
}
pub fn with_flattened_llc_node() -> Result<Topology> {
let span = cpus_online()?;
let mut topo_ctx = TopoCtx::new();
let nodes = create_default_node(&span, &mut topo_ctx, true, None)?;
Self::instantiate(span, nodes)
}
pub fn with_args(topology_args: &crate::cli::TopologyArgs) -> Result<Topology> {
topology_args.validate()?;
let nr_cores_per_vllc = topology_args.get_nr_cores_per_vllc();
Self::with_virt_llcs(nr_cores_per_vllc)
}
#[cfg(feature = "gpu-topology")]
pub fn gpus(&self) -> BTreeMap<GpuIndex, &Gpu> {
let mut gpus = BTreeMap::new();
for node in self.nodes.values() {
for (idx, gpu) in &node.gpus {
gpus.insert(*idx, gpu);
}
}
gpus
}
pub fn has_little_cores(&self) -> bool {
self.all_cores
.values()
.any(|c| c.core_type == CoreType::Little)
}
pub fn sibling_cpus(&self) -> Vec<i32> {
let mut sibling_cpu = vec![-1i32; *NR_CPUS_POSSIBLE];
for core in self.all_cores.values() {
let mut first = -1i32;
for &cpu in core.cpus.keys() {
if first < 0 {
first = cpu as i32;
} else {
sibling_cpu[first as usize] = cpu as i32;
sibling_cpu[cpu] = first;
break;
}
}
}
sibling_cpu
}
pub fn cpumask_nr_cores(&self, cpumask: &Cpumask) -> usize {
let mut count = 0;
for core in self.all_cores.values() {
if core.cpus.keys().any(|&cpu_id| cpumask.test_cpu(cpu_id)) {
count += 1;
}
}
count
}
pub fn format_cpumask_grid<W: Write>(
&self,
w: &mut W,
cpumask: &Cpumask,
indent: &str,
max_width: usize,
) -> Result<()> {
for node in self.nodes.values() {
let mut llc_segments: Vec<(usize, String)> = Vec::new();
for llc in node.llcs.values() {
let mut seg = String::new();
let nr_cores = llc.cores.len();
let nr_groups = (nr_cores + 7) / 8;
let base = nr_cores / nr_groups;
let rem = nr_cores % nr_groups;
let mut next_break = if rem > 0 { base + 1 } else { base };
let mut group_idx = 0;
for (i, core) in llc.cores.values().enumerate() {
if i > 0 && i == next_break {
seg.push(' ');
group_idx += 1;
next_break += if group_idx < rem { base + 1 } else { base };
}
let nr_cpus = core.cpus.len();
let cpu_ids: Vec<usize> = core.cpus.keys().copied().collect();
let nr_set: usize = cpu_ids.iter().filter(|&&c| cpumask.test_cpu(c)).count();
let ch = if nr_cpus == 1 {
if nr_set > 0 {
'█'
} else {
'░'
}
} else if nr_cpus == 2 {
let first_set = cpumask.test_cpu(cpu_ids[0]);
let second_set = cpumask.test_cpu(cpu_ids[1]);
match (first_set, second_set) {
(false, false) => '░',
(true, false) => '▀',
(false, true) => '▄',
(true, true) => '█',
}
} else {
if nr_set == 0 {
'░'
} else if nr_set == nr_cpus {
'█'
} else {
'▄'
}
};
seg.push(ch);
}
llc_segments.push((llc.id, seg));
}
if llc_segments.is_empty() {
continue;
}
let first_llc_id = llc_segments[0].0;
let prefix = format!("{}N{} L{:02}: ", indent, node.id, first_llc_id);
let prefix_width = prefix.chars().count();
let cont_indent = format!(
"{}{}",
indent,
" ".repeat(prefix_width - indent.chars().count())
);
let mut line = prefix.clone();
let mut first_llc = true;
for (_, seg) in &llc_segments {
let seg_width = seg.chars().count();
let separator = if first_llc { "" } else { "|" };
let sep_width = separator.chars().count();
let current_line_width = line.chars().count();
if !first_llc && current_line_width + sep_width + seg_width > max_width {
writeln!(w, "{}", line)?;
line = format!("{}{}", cont_indent, seg);
} else {
line = format!("{}{}{}", line, separator, seg);
}
first_llc = false;
}
writeln!(w, "{}", line)?;
}
Ok(())
}
pub fn format_cpumask_header(&self, cpumask: &Cpumask, min_cpus: u32, max_cpus: u32) -> String {
let nr_cpus = cpumask.weight();
let nr_cores = self.cpumask_nr_cores(cpumask);
format!(
"cpus={:3}({:3}c) [{:3},{:3}]",
nr_cpus, nr_cores, min_cpus, max_cpus
)
}
}
struct TopoCtx {
node_core_kernel_ids: BTreeMap<(usize, usize, usize), usize>,
node_llc_kernel_ids: BTreeMap<(usize, usize, usize), usize>,
l2_ids: BTreeMap<String, usize>,
l3_ids: BTreeMap<String, usize>,
}
impl TopoCtx {
fn new() -> TopoCtx {
let core_kernel_ids = BTreeMap::new();
let llc_kernel_ids = BTreeMap::new();
let l2_ids = BTreeMap::new();
let l3_ids = BTreeMap::new();
TopoCtx {
node_core_kernel_ids: core_kernel_ids,
node_llc_kernel_ids: llc_kernel_ids,
l2_ids,
l3_ids,
}
}
}
fn cpus_online() -> Result<Cpumask> {
let path = format!("{}/sys/devices/system/cpu/online", *ROOT_PREFIX);
let online = std::fs::read_to_string(path)?;
Cpumask::from_cpulist(&online)
}
fn get_cache_id(topo_ctx: &mut TopoCtx, cache_level_path: &Path, cache_level: usize) -> usize {
let id_map = match cache_level {
2 => &mut topo_ctx.l2_ids,
3 => &mut topo_ctx.l3_ids,
_ => return usize::MAX,
};
let path = &cache_level_path.join("shared_cpu_list");
let key = match std::fs::read_to_string(path) {
Ok(key) => key,
Err(_) => return usize::MAX,
};
let id = *id_map.get(&key).unwrap_or(&usize::MAX);
if id != usize::MAX {
return id;
}
let id = read_from_file(&cache_level_path.join("id")).unwrap_or(usize::MAX);
if id != usize::MAX {
id_map.insert(key, id);
return id;
}
let id = id_map.len();
id_map.insert(key, id);
id
}
fn get_per_cpu_cache_size(cache_path: &Path) -> Result<usize> {
let path_str = cache_path.to_str().unwrap();
let paths = glob(&(path_str.to_owned() + "/index[0-9]*"))?;
let mut tot_size = 0;
for index in paths.filter_map(Result::ok) {
let size = read_file_byte(&index.join("size")).unwrap_or(1024_usize);
let cpulist: String = read_from_file(&index.join("shared_cpu_list"))?;
let num_cpus = read_cpulist(&cpulist)?.len();
tot_size += size / num_cpus;
}
Ok(tot_size)
}
#[allow(clippy::too_many_arguments)]
fn create_insert_cpu(
id: usize,
node: &mut Node,
online_mask: &Cpumask,
topo_ctx: &mut TopoCtx,
cs: &CapacitySource,
flatten_llc: bool,
) -> Result<()> {
if !online_mask.test_cpu(id) {
return Ok(());
}
let cpu_str = format!("{}/sys/devices/system/cpu/cpu{}", *ROOT_PREFIX, id);
let cpu_path = Path::new(&cpu_str);
let top_path = cpu_path.join("topology");
let core_kernel_id = read_from_file(&top_path.join("core_id"))?;
let package_id = read_from_file(&top_path.join("physical_package_id"))?;
let cluster_id = read_from_file(&top_path.join("cluster_id"))?;
let cache_path = cpu_path.join("cache");
let l2_id = get_cache_id(topo_ctx, &cache_path.join(format!("index{}", 2)), 2);
let l3_id = get_cache_id(topo_ctx, &cache_path.join(format!("index{}", 3)), 3);
let llc_kernel_id = if flatten_llc {
0
} else if l3_id == usize::MAX {
l2_id
} else {
l3_id
};
let cache_size = get_per_cpu_cache_size(&cache_path).unwrap_or(0_usize);
let freq_path = cpu_path.join("cpufreq");
let min_freq = read_from_file(&freq_path.join("scaling_min_freq")).unwrap_or(0_usize);
let max_freq = read_from_file(&freq_path.join("scaling_max_freq")).unwrap_or(0_usize);
let base_freq = read_from_file(&freq_path.join("base_frequency")).unwrap_or(max_freq);
let trans_lat_ns =
read_from_file(&freq_path.join("cpuinfo_transition_latency")).unwrap_or(0_usize);
let cap_path = cpu_path.join(cs.suffix.clone());
let rcap = read_from_file(&cap_path).unwrap_or(cs.max_rcap);
let cpu_capacity = (rcap * 1024) / cs.max_rcap;
let power_path = cpu_path.join("power");
let pm_qos_resume_latency_us =
read_from_file(&power_path.join("pm_qos_resume_latency_us")).unwrap_or(0_usize);
let num_llcs = topo_ctx.node_llc_kernel_ids.len();
let llc_id = topo_ctx
.node_llc_kernel_ids
.entry((node.id, package_id, llc_kernel_id))
.or_insert(num_llcs);
let llc = node.llcs.entry(*llc_id).or_insert(Arc::new(Llc {
id: *llc_id,
cores: BTreeMap::new(),
span: Cpumask::new(),
all_cpus: BTreeMap::new(),
node_id: node.id,
kernel_id: llc_kernel_id,
}));
let llc_mut = Arc::get_mut(llc).unwrap();
let core_type = if cs.avg_rcap < cs.max_rcap && rcap == cs.max_rcap {
CoreType::Big { turbo: true }
} else if !cs.has_biglittle || rcap >= cs.avg_rcap {
CoreType::Big { turbo: false }
} else {
CoreType::Little
};
let num_cores = topo_ctx.node_core_kernel_ids.len();
let core_id = topo_ctx
.node_core_kernel_ids
.entry((node.id, package_id, core_kernel_id))
.or_insert(num_cores);
let core = llc_mut.cores.entry(*core_id).or_insert(Arc::new(Core {
id: *core_id,
cpus: BTreeMap::new(),
span: Cpumask::new(),
core_type: core_type.clone(),
llc_id: *llc_id,
node_id: node.id,
kernel_id: core_kernel_id,
cluster_id,
}));
let core_mut = Arc::get_mut(core).unwrap();
core_mut.cpus.insert(
id,
Arc::new(Cpu {
id,
min_freq,
max_freq,
base_freq,
cpu_capacity,
smt_level: 0, pm_qos_resume_latency_us,
trans_lat_ns,
l2_id,
l3_id,
cache_size,
core_type: core_type.clone(),
core_id: *core_id,
llc_id: *llc_id,
node_id: node.id,
package_id,
cluster_id,
}),
);
if node.span.test_cpu(id) {
bail!("Node {} already had CPU {}", node.id, id);
}
core_mut.span.set_cpu(id)?;
llc_mut.span.set_cpu(id)?;
node.span.set_cpu(id)?;
Ok(())
}
fn read_cpu_ids() -> Result<Vec<usize>> {
let mut cpu_ids = vec![];
let path = format!("{}/sys/devices/system/cpu/cpu[0-9]*", *ROOT_PREFIX);
let cpu_paths = glob(&path)?;
for cpu_path in cpu_paths.filter_map(Result::ok) {
let cpu_str = cpu_path.to_str().unwrap().trim();
if ROOT_PREFIX.is_empty() {
match sscanf!(cpu_str, "/sys/devices/system/cpu/cpu{usize}") {
Ok(val) => cpu_ids.push(val),
Err(_) => {
bail!("Failed to parse cpu ID {}", cpu_str);
}
}
} else {
match sscanf!(cpu_str, "{str}/sys/devices/system/cpu/cpu{usize}") {
Ok((_, val)) => cpu_ids.push(val),
Err(_) => {
bail!("Failed to parse cpu ID {}", cpu_str);
}
}
}
}
cpu_ids.sort();
Ok(cpu_ids)
}
struct CapacitySource {
suffix: String,
avg_rcap: usize,
max_rcap: usize,
has_biglittle: bool,
}
fn get_capacity_source() -> Option<CapacitySource> {
let sources = [
"cpufreq/amd_pstate_prefcore_ranking",
"cpufreq/amd_pstate_highest_perf",
"acpi_cppc/highest_perf",
"cpu_capacity",
"cpufreq/cpuinfo_max_freq",
];
let prefix = format!("{}/sys/devices/system/cpu/cpu0", *ROOT_PREFIX);
let mut raw_capacity;
let mut suffix = sources[sources.len() - 1];
'outer: for src in sources {
let path_str = [prefix.clone(), src.to_string()].join("/");
let path = Path::new(&path_str);
raw_capacity = read_from_file(&path).unwrap_or(0_usize);
if raw_capacity > 0 {
suffix = src;
let path = format!("{}/sys/devices/system/cpu/cpu[0-9]*", *ROOT_PREFIX);
let cpu_paths = glob(&path).ok()?;
for cpu_path in cpu_paths.filter_map(Result::ok) {
let raw_capacity2 = read_from_file(&cpu_path.join(suffix)).unwrap_or(0_usize);
if raw_capacity != raw_capacity2 {
break 'outer;
}
}
}
}
let mut max_rcap = 0;
let mut min_rcap = usize::MAX;
let mut avg_rcap = 0;
let mut nr_cpus = 0;
let mut has_biglittle = false;
let path = format!("{}/sys/devices/system/cpu/cpu[0-9]*", *ROOT_PREFIX);
let cpu_paths = glob(&path).ok()?;
for cpu_path in cpu_paths.filter_map(Result::ok) {
let rcap = read_from_file(&cpu_path.join(suffix)).unwrap_or(0_usize);
if max_rcap < rcap {
max_rcap = rcap;
}
if min_rcap > rcap {
min_rcap = rcap;
}
avg_rcap += rcap;
nr_cpus += 1;
}
if nr_cpus == 0 || max_rcap == 0 {
suffix = "";
avg_rcap = 1024;
max_rcap = 1024;
warn!("CPU capacity information is not available under sysfs.");
} else {
avg_rcap /= nr_cpus;
has_biglittle = max_rcap as f32 >= (1.3 * min_rcap as f32);
}
Some(CapacitySource {
suffix: suffix.to_string(),
avg_rcap,
max_rcap,
has_biglittle,
})
}
fn is_smt_active() -> Option<bool> {
let path = format!("{}/sys/devices/system/cpu/smt/active", *ROOT_PREFIX);
let smt_on: u8 = read_from_file(Path::new(&path)).ok()?;
Some(smt_on == 1)
}
fn replace_with_virt_llcs(
node: &mut Node,
min_cores: usize,
max_cores: usize,
start_id: usize,
) -> Result<usize> {
let mut next_id = start_id;
let mut core_to_partition: BTreeMap<usize, usize> = BTreeMap::new();
let mut partition_to_kernel_id: BTreeMap<usize, usize> = BTreeMap::new();
let num_orig_llcs = node.llcs.len();
for (_llc_id, llc) in node.llcs.iter() {
let mut cores_by_type: BTreeMap<bool, Vec<usize>> = BTreeMap::new();
for (core_id, core) in llc.cores.iter() {
let core_type = core.core_type == CoreType::Little;
cores_by_type
.entry(core_type)
.or_insert(Vec::new())
.push(*core_id);
}
for (_core_type, core_ids) in cores_by_type.iter() {
let num_cores_in_bucket = core_ids.len();
let best_split = find_best_split_size(num_cores_in_bucket, min_cores, max_cores);
let num_partitions = num_cores_in_bucket / best_split;
for (bucket_idx, &core_id) in core_ids.iter().enumerate() {
let partition_idx = min(bucket_idx / best_split, num_partitions - 1);
let current_partition_id = next_id + partition_idx;
core_to_partition.insert(core_id, current_partition_id);
partition_to_kernel_id.insert(current_partition_id, llc.kernel_id);
}
next_id += num_partitions;
}
}
let mut virt_llcs: BTreeMap<usize, Arc<Llc>> = BTreeMap::new();
for vllc_id in start_id..next_id {
let kernel_id = partition_to_kernel_id.get(&vllc_id).copied().unwrap();
virt_llcs.insert(
vllc_id,
Arc::new(Llc {
id: vllc_id,
kernel_id,
cores: BTreeMap::new(),
span: Cpumask::new(),
node_id: node.id,
all_cpus: BTreeMap::new(),
}),
);
}
for (_llc_id, llc) in node.llcs.iter_mut() {
for (core_id, core) in llc.cores.iter() {
if let Some(&target_partition_id) = core_to_partition.get(core_id) {
if let Some(target_llc) = virt_llcs.get_mut(&target_partition_id) {
let target_llc_mut = Arc::get_mut(target_llc).unwrap();
let mut new_core = (**core).clone();
new_core.llc_id = target_partition_id;
let mut updated_cpus = BTreeMap::new();
for (cpu_id, cpu) in new_core.cpus.iter() {
let mut new_cpu = (**cpu).clone();
new_cpu.llc_id = target_partition_id;
target_llc_mut.span.set_cpu(*cpu_id)?;
updated_cpus.insert(*cpu_id, Arc::new(new_cpu));
}
new_core.cpus = updated_cpus;
target_llc_mut.cores.insert(*core_id, Arc::new(new_core));
}
}
}
}
node.llcs = virt_llcs;
let num_virt_llcs = next_id - start_id;
let vllc_sizes: Vec<usize> = node.llcs.values().map(|llc| llc.cores.len()).collect();
if vllc_sizes.is_empty() {
return Ok(next_id);
}
let common_size = vllc_sizes[0];
let last_size = *vllc_sizes.last().unwrap();
if common_size == last_size {
info!(
"Node {}: split {} LLC(s) into {} virtual LLCs with {} cores each",
node.id, num_orig_llcs, num_virt_llcs, common_size
);
} else {
info!(
"Node {}: split {} LLC(s) into {} virtual LLCs with {} cores each (last with {})",
node.id, num_orig_llcs, num_virt_llcs, common_size, last_size
);
}
Ok(next_id)
}
fn create_default_node(
online_mask: &Cpumask,
topo_ctx: &mut TopoCtx,
flatten_llc: bool,
nr_cores_per_vllc: Option<(usize, usize)>,
) -> Result<BTreeMap<usize, Node>> {
let mut nodes = BTreeMap::<usize, Node>::new();
let mut node = Node {
id: 0,
distance: vec![],
llcs: BTreeMap::new(),
span: Cpumask::new(),
#[cfg(feature = "gpu-topology")]
gpus: BTreeMap::new(),
all_cores: BTreeMap::new(),
all_cpus: BTreeMap::new(),
};
#[cfg(feature = "gpu-topology")]
{
let system_gpus = create_gpus();
if let Some(gpus) = system_gpus.get(&0) {
for gpu in gpus {
node.gpus.insert(gpu.index, gpu.clone());
}
}
}
let path = format!("{}/sys/devices/system/cpu", *ROOT_PREFIX);
if !Path::new(&path).exists() {
bail!("/sys/devices/system/cpu sysfs node not found");
}
let cs = get_capacity_source().unwrap();
let cpu_ids = read_cpu_ids()?;
for cpu_id in cpu_ids.iter() {
create_insert_cpu(*cpu_id, &mut node, online_mask, topo_ctx, &cs, flatten_llc)?;
}
if let Some((min_cores_val, max_cores_val)) = nr_cores_per_vllc {
replace_with_virt_llcs(&mut node, min_cores_val, max_cores_val, 0)?;
}
nodes.insert(node.id, node);
Ok(nodes)
}
fn create_numa_nodes(
online_mask: &Cpumask,
topo_ctx: &mut TopoCtx,
nr_cores_per_vllc: Option<(usize, usize)>,
) -> Result<BTreeMap<usize, Node>> {
let mut nodes = BTreeMap::<usize, Node>::new();
let mut next_virt_llc_id = 0;
#[cfg(feature = "gpu-topology")]
let system_gpus = create_gpus();
let path = format!("{}/sys/devices/system/node/node*", *ROOT_PREFIX);
let numa_paths = glob(&path)?;
for numa_path in numa_paths.filter_map(Result::ok) {
let numa_str = numa_path.to_str().unwrap().trim();
let node_id = if ROOT_PREFIX.is_empty() {
match sscanf!(numa_str, "/sys/devices/system/node/node{usize}") {
Ok(val) => val,
Err(_) => {
bail!("Failed to parse NUMA node ID {}", numa_str);
}
}
} else {
match sscanf!(numa_str, "{str}/sys/devices/system/node/node{usize}") {
Ok((_, val)) => val,
Err(_) => {
bail!("Failed to parse NUMA node ID {}", numa_str);
}
}
};
let distance = read_file_usize_vec(
Path::new(&format!(
"{}/sys/devices/system/node/node{}/distance",
*ROOT_PREFIX, node_id
)),
' ',
)?;
let mut node = Node {
id: node_id,
distance,
llcs: BTreeMap::new(),
span: Cpumask::new(),
all_cores: BTreeMap::new(),
all_cpus: BTreeMap::new(),
#[cfg(feature = "gpu-topology")]
gpus: BTreeMap::new(),
};
#[cfg(feature = "gpu-topology")]
{
if let Some(gpus) = system_gpus.get(&node_id) {
for gpu in gpus {
node.gpus.insert(gpu.index, gpu.clone());
}
}
}
let cpu_pattern = numa_path.join("cpu[0-9]*");
let cpu_paths = glob(cpu_pattern.to_string_lossy().as_ref())?;
let cs = get_capacity_source().unwrap();
let mut cpu_ids = vec![];
for cpu_path in cpu_paths.filter_map(Result::ok) {
let cpu_str = cpu_path.to_str().unwrap().trim();
let cpu_id = if ROOT_PREFIX.is_empty() {
match sscanf!(cpu_str, "/sys/devices/system/node/node{usize}/cpu{usize}") {
Ok((_, val)) => val,
Err(_) => {
bail!("Failed to parse cpu ID {}", cpu_str);
}
}
} else {
match sscanf!(
cpu_str,
"{str}/sys/devices/system/node/node{usize}/cpu{usize}"
) {
Ok((_, _, val)) => val,
Err(_) => {
bail!("Failed to parse cpu ID {}", cpu_str);
}
}
};
cpu_ids.push(cpu_id);
}
cpu_ids.sort();
for cpu_id in cpu_ids {
create_insert_cpu(cpu_id, &mut node, online_mask, topo_ctx, &cs, false)?;
}
if let Some((min_cores_val, max_cores_val)) = nr_cores_per_vllc {
next_virt_llc_id =
replace_with_virt_llcs(&mut node, min_cores_val, max_cores_val, next_virt_llc_id)?;
}
nodes.insert(node.id, node);
}
Ok(nodes)
}
#[cfg(any(test, feature = "testutils"))]
pub mod testutils {
use super::*;
use crate::set_cpumask_test_width;
pub fn test_cpu(id: usize, core_id: usize, llc_id: usize, node_id: usize) -> Cpu {
Cpu {
id,
core_id,
llc_id,
node_id,
min_freq: 0,
max_freq: 0,
base_freq: 0,
cpu_capacity: 1024,
smt_level: 0, pm_qos_resume_latency_us: 0,
trans_lat_ns: 0,
l2_id: 0,
l3_id: llc_id,
cache_size: 0,
core_type: CoreType::Big { turbo: false },
package_id: node_id,
cluster_id: 0,
}
}
pub fn test_core(
id: usize,
cpus: BTreeMap<usize, Arc<Cpu>>,
llc_id: usize,
node_id: usize,
) -> Core {
let mut span = Cpumask::new();
for &cpu_id in cpus.keys() {
span.set_cpu(cpu_id).unwrap();
}
Core {
id,
kernel_id: id,
cluster_id: 0,
cpus,
span,
core_type: CoreType::Big { turbo: false },
llc_id,
node_id,
}
}
pub fn test_llc(id: usize, cores: BTreeMap<usize, Arc<Core>>, node_id: usize) -> Llc {
let mut span = Cpumask::new();
for core in cores.values() {
for &cpu_id in core.cpus.keys() {
span.set_cpu(cpu_id).unwrap();
}
}
Llc {
id,
kernel_id: id,
cores,
span,
node_id,
all_cpus: BTreeMap::new(), }
}
pub fn test_node(id: usize, llcs: BTreeMap<usize, Arc<Llc>>, nr_nodes: usize) -> Node {
let mut span = Cpumask::new();
for llc in llcs.values() {
for core in llc.cores.values() {
for &cpu_id in core.cpus.keys() {
span.set_cpu(cpu_id).unwrap();
}
}
}
Node {
id,
distance: vec![10; nr_nodes],
llcs,
span,
all_cores: BTreeMap::new(), all_cpus: BTreeMap::new(), #[cfg(feature = "gpu-topology")]
gpus: BTreeMap::new(),
}
}
pub fn make_test_topo(
nr_nodes: usize,
llcs_per_node: usize,
cores_per_llc: usize,
hts_per_core: usize,
) -> (Topology, usize) {
let total_cpus = nr_nodes * llcs_per_node * cores_per_llc * hts_per_core;
set_cpumask_test_width(total_cpus);
let mut cpu_id = 0usize;
let mut core_id = 0usize;
let mut llc_id = 0usize;
let mut nodes = BTreeMap::new();
for node_idx in 0..nr_nodes {
let mut llcs = BTreeMap::new();
for _ in 0..llcs_per_node {
let mut cores = BTreeMap::new();
for _ in 0..cores_per_llc {
let mut cpus = BTreeMap::new();
for _ in 0..hts_per_core {
cpus.insert(
cpu_id,
Arc::new(test_cpu(cpu_id, core_id, llc_id, node_idx)),
);
cpu_id += 1;
}
cores.insert(
core_id,
Arc::new(test_core(core_id, cpus, llc_id, node_idx)),
);
core_id += 1;
}
llcs.insert(llc_id, Arc::new(test_llc(llc_id, cores, node_idx)));
llc_id += 1;
}
nodes.insert(node_idx, test_node(node_idx, llcs, nr_nodes));
}
let mut span = Cpumask::new();
for i in 0..total_cpus {
span.set_cpu(i).unwrap();
}
(Topology::instantiate(span, nodes).unwrap(), total_cpus)
}
pub fn mask_from_bits(_total: usize, bits: &[usize]) -> Cpumask {
let mut mask = Cpumask::new();
for &b in bits {
mask.set_cpu(b).unwrap();
}
mask
}
}
#[cfg(test)]
mod tests {
use super::testutils::*;
use super::*;
fn grid_output(topo: &Topology, cpumask: &Cpumask) -> String {
let mut buf = Vec::new();
topo.format_cpumask_grid(&mut buf, cpumask, " ", 80)
.unwrap();
String::from_utf8(buf).unwrap()
}
#[test]
fn test_grid_2node_2llc_3core_2ht() {
let (topo, total) = make_test_topo(2, 2, 3, 2);
assert_eq!(total, 24);
let cpumask = mask_from_bits(total, &[1, 2, 3, 12]);
let output = grid_output(&topo, &cpumask);
assert!(output.contains("N0 L00:"));
assert!(output.contains("N1 L02:"));
assert!(output.contains("▄█░|░░░"));
assert!(output.contains("▀░░|░░░"));
assert_eq!(topo.cpumask_nr_cores(&cpumask), 3);
}
#[test]
fn test_grid_empty_cpumask() {
let (topo, total) = make_test_topo(1, 2, 3, 2);
let cpumask = mask_from_bits(total, &[]);
let output = grid_output(&topo, &cpumask);
assert!(!output.contains('█'));
assert!(!output.contains('▀'));
assert!(!output.contains('▄'));
assert!(output.contains('░'));
assert_eq!(topo.cpumask_nr_cores(&cpumask), 0);
}
#[test]
fn test_grid_full_cpumask() {
let (topo, total) = make_test_topo(1, 2, 3, 2);
let cpumask = mask_from_bits(total, &(0..total).collect::<Vec<_>>());
let output = grid_output(&topo, &cpumask);
assert!(!output.contains('░'));
assert!(!output.contains('▀'));
assert!(!output.contains('▄'));
assert!(output.contains('█'));
assert_eq!(topo.cpumask_nr_cores(&cpumask), 6);
}
#[test]
fn test_grid_mixed_ht() {
let (topo, total) = make_test_topo(1, 1, 4, 2);
let cpumask = mask_from_bits(total, &[0, 3, 4, 5]);
let output = grid_output(&topo, &cpumask);
assert!(output.contains('▀'));
assert!(output.contains('▄'));
assert!(output.contains('█'));
assert!(output.contains('░'));
}
#[test]
fn test_grid_single_node() {
let (topo, total) = make_test_topo(1, 1, 2, 2);
let cpumask = mask_from_bits(total, &[0, 1]);
let output = grid_output(&topo, &cpumask);
assert!(output.contains("N0 L00:"));
assert!(!output.contains("N1"));
}
#[test]
fn test_grid_overflow_wrap() {
let (topo, total) = make_test_topo(1, 12, 4, 2);
let cpumask = mask_from_bits(total, &[0]);
let mut buf = Vec::new();
topo.format_cpumask_grid(&mut buf, &cpumask, " ", 60)
.unwrap();
let output = String::from_utf8(buf).unwrap();
let lines: Vec<&str> = output.lines().collect();
assert!(
lines.len() > 1,
"Expected wrapping with narrow width, got {} lines",
lines.len()
);
}
#[test]
fn test_grid_smt_off() {
let (topo, total) = make_test_topo(1, 1, 4, 1);
let cpumask = mask_from_bits(total, &[0, 2]);
let output = grid_output(&topo, &cpumask);
assert!(output.contains('█'));
assert!(output.contains('░'));
assert!(!output.contains('▀'));
assert!(!output.contains('▄'));
}
#[test]
fn test_grid_4way_smt() {
let (topo, total) = make_test_topo(1, 1, 2, 4);
let cpumask = mask_from_bits(total, &[0, 1, 2, 3, 4, 5]);
let output = grid_output(&topo, &cpumask);
assert!(output.contains('█')); assert!(output.contains('▄')); }
#[test]
fn test_cpumask_header() {
let (topo, total) = make_test_topo(1, 1, 4, 2);
let cpumask = mask_from_bits(total, &[0, 1, 2]);
let header = topo.format_cpumask_header(&cpumask, 5, 10);
assert!(header.contains("cpus= 3( 2c)"));
assert!(header.contains("[ 5, 10]"));
}
}