use crate::monitor::bpf_map::GuestMemMapAccessorOwned;
use crate::monitor::btf_offsets::{NR_VM_NUMA_EVENT_ITEMS, NumaStatsOffsets};
use crate::monitor::dump::PerNodeNumaStats;
use crate::monitor::guest::GuestKernel;
use crate::monitor::idr::translate_any_kva;
use crate::monitor::symbols::KernelSymbols;
const MAX_NR_ZONES: usize = 5;
pub(crate) fn build(
owned_accessor: &GuestMemMapAccessorOwned,
offsets: Option<&NumaStatsOffsets>,
symbols: Option<&KernelSymbols>,
nr_nodes: u32,
) -> Option<Vec<PerNodeNumaStats>> {
let offsets = offsets?;
let symbols = symbols?;
let node_data_kva = symbols.node_data?;
if nr_nodes == 0 {
return None;
}
let kernel = owned_accessor.guest_kernel();
let stats = walk_node_data(kernel, node_data_kva, offsets, nr_nodes);
if stats.is_empty() { None } else { Some(stats) }
}
fn walk_node_data(
kernel: &GuestKernel<'_>,
node_data_kva: u64,
offsets: &NumaStatsOffsets,
nr_nodes: u32,
) -> Vec<PerNodeNumaStats> {
let mut out = Vec::with_capacity(nr_nodes as usize);
let node_data_pa = kernel.text_kva_to_pa(node_data_kva);
let mem = kernel.mem();
for node in 0..nr_nodes {
let pgdat_kva = mem.read_u64(node_data_pa, (node as usize) * 8);
if pgdat_kva == 0 {
continue;
}
let Some(per_node) = read_per_node_stats(kernel, pgdat_kva, offsets, node) else {
continue;
};
out.push(per_node);
}
out
}
fn read_per_node_stats(
kernel: &GuestKernel<'_>,
pgdat_kva: u64,
offsets: &NumaStatsOffsets,
node: u32,
) -> Option<PerNodeNumaStats> {
let mem = kernel.mem();
let walk = kernel.walk_context();
let pgdat_pa = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
pgdat_kva,
walk.l5,
walk.tcr_el1,
)?;
let mut sums = [0u64; NR_VM_NUMA_EVENT_ITEMS];
let zones_base = pgdat_pa.checked_add(offsets.pglist_data_node_zones as u64)?;
let mut slot_offs = [0usize; NR_VM_NUMA_EVENT_ITEMS];
for (slot, off) in slot_offs.iter_mut().enumerate() {
*off = offsets
.zone_vm_numa_event
.checked_add(slot.checked_mul(8)?)?;
}
for zone_idx in 0..MAX_NR_ZONES {
let zone_off = (zone_idx as u64).checked_mul(offsets.zone_size as u64)?;
let zone_pa = zones_base.checked_add(zone_off)?;
for (slot_off, sum) in slot_offs.iter().zip(sums.iter_mut()) {
let v = mem.read_u64(zone_pa, *slot_off);
*sum = sum.wrapping_add(v);
}
}
Some(PerNodeNumaStats {
node,
numa_hit: sums[crate::monitor::btf_offsets::NUMA_HIT],
numa_miss: sums[crate::monitor::btf_offsets::NUMA_MISS],
numa_foreign: sums[crate::monitor::btf_offsets::NUMA_FOREIGN],
numa_interleave_hit: sums[crate::monitor::btf_offsets::NUMA_INTERLEAVE_HIT],
numa_local: sums[crate::monitor::btf_offsets::NUMA_LOCAL],
numa_other: sums[crate::monitor::btf_offsets::NUMA_OTHER],
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::monitor::reader::GuestMem;
use crate::monitor::symbols::{
DEFAULT_PAGE_OFFSET, START_KERNEL_MAP, text_kva_to_pa_with_base,
};
use std::collections::HashMap;
struct NumaLayout {
buf: Vec<u8>,
offsets: NumaStatsOffsets,
node_data_kva: u64,
nr_nodes: u32,
}
impl NumaLayout {
fn build(nr_nodes: u32) -> Self {
let pglist_data_node_zones = 0x40usize;
let zone_size = 0x80usize;
let zone_vm_numa_event = 0x10usize;
let pgdat_size = pglist_data_node_zones + MAX_NR_ZONES * zone_size;
let node_data_pa: u64 = 0x1000;
let pgdat_base_pa: u64 = 0x10000;
let total = (pgdat_base_pa as usize) + (nr_nodes as usize) * pgdat_size + 0x100;
let mut buf = vec![0u8; total];
for n in 0..nr_nodes {
let pgdat_pa = pgdat_base_pa + (n as u64) * (pgdat_size as u64);
let pgdat_kva = DEFAULT_PAGE_OFFSET.wrapping_add(pgdat_pa);
let slot = (node_data_pa as usize) + (n as usize) * 8;
buf[slot..slot + 8].copy_from_slice(&pgdat_kva.to_le_bytes());
for z in 0..MAX_NR_ZONES {
let zone_pa = pgdat_pa
+ (pglist_data_node_zones as u64)
+ (z as u64) * (zone_size as u64);
for k in 0..NR_VM_NUMA_EVENT_ITEMS {
let slot_pa = zone_pa as usize + zone_vm_numa_event + k * 8;
let v: u64 = (n as u64) * 1000 + (z as u64) * 100 + (k as u64);
buf[slot_pa..slot_pa + 8].copy_from_slice(&v.to_le_bytes());
}
}
}
let node_data_kva = START_KERNEL_MAP.wrapping_add(node_data_pa);
NumaLayout {
buf,
offsets: NumaStatsOffsets {
pglist_data_node_zones,
zone_vm_numa_event,
zone_size,
},
node_data_kva,
nr_nodes,
}
}
}
fn make_kernel(buf: &mut [u8]) -> GuestMem {
unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) }
}
#[test]
fn two_nodes_summed_across_zones() {
let mut layout = NumaLayout::build(2);
let nr_nodes = layout.nr_nodes;
let node_data_kva = layout.node_data_kva;
let offsets = layout.offsets;
let mem = make_kernel(&mut layout.buf);
let kernel = GuestKernel::new_for_test(&mem, HashMap::new(), DEFAULT_PAGE_OFFSET, 0, false);
let stats = walk_node_data(&kernel, node_data_kva, &offsets, nr_nodes);
assert_eq!(stats.len(), 2);
let expected = |n: u64, k: u64| -> u64 { 5 * n * 1000 + 1000 + 5 * k };
for (idx, st) in stats.iter().enumerate() {
let n = idx as u64;
assert_eq!(st.node, idx as u32);
assert_eq!(st.numa_hit, expected(n, 0));
assert_eq!(st.numa_miss, expected(n, 1));
assert_eq!(st.numa_foreign, expected(n, 2));
assert_eq!(st.numa_interleave_hit, expected(n, 3));
assert_eq!(st.numa_local, expected(n, 4));
assert_eq!(st.numa_other, expected(n, 5));
}
}
#[test]
fn offline_node_skipped() {
let mut layout = NumaLayout::build(2);
let node_data_pa =
text_kva_to_pa_with_base(layout.node_data_kva, START_KERNEL_MAP, 0) as usize;
for b in &mut layout.buf[node_data_pa + 8..node_data_pa + 16] {
*b = 0;
}
let nr_nodes = layout.nr_nodes;
let node_data_kva = layout.node_data_kva;
let offsets = layout.offsets;
let mem = make_kernel(&mut layout.buf);
let kernel = GuestKernel::new_for_test(&mem, HashMap::new(), DEFAULT_PAGE_OFFSET, 0, false);
let stats = walk_node_data(&kernel, node_data_kva, &offsets, nr_nodes);
assert_eq!(stats.len(), 1);
assert_eq!(stats[0].node, 0);
}
#[test]
fn all_slots_zero_yields_empty() {
let mut buf = vec![0u8; 0x1_0000];
let mem = make_kernel(&mut buf);
let kernel = GuestKernel::new_for_test(&mem, HashMap::new(), DEFAULT_PAGE_OFFSET, 0, false);
let offsets = NumaStatsOffsets {
pglist_data_node_zones: 0x40,
zone_vm_numa_event: 0x10,
zone_size: 0x80,
};
let kva = START_KERNEL_MAP.wrapping_add(0x1000);
let stats = walk_node_data(&kernel, kva, &offsets, 4);
assert!(stats.is_empty());
}
#[test]
fn zero_nodes_yields_empty_walk() {
let mut buf = vec![0u8; 0x1_0000];
let mem = make_kernel(&mut buf);
let kernel = GuestKernel::new_for_test(&mem, HashMap::new(), DEFAULT_PAGE_OFFSET, 0, false);
let offsets = NumaStatsOffsets {
pglist_data_node_zones: 0x40,
zone_vm_numa_event: 0x10,
zone_size: 0x80,
};
let kva = START_KERNEL_MAP.wrapping_add(0x1000);
let stats = walk_node_data(&kernel, kva, &offsets, 0);
assert!(stats.is_empty());
}
#[test]
fn mixed_populated_and_zero_tail_yields_populated_only() {
let mut layout = NumaLayout::build(2);
let nr_nodes_to_walk: u32 = 4;
let node_data_kva = layout.node_data_kva;
let offsets = layout.offsets;
let mem = make_kernel(&mut layout.buf);
let kernel = GuestKernel::new_for_test(&mem, HashMap::new(), DEFAULT_PAGE_OFFSET, 0, false);
let stats = walk_node_data(&kernel, node_data_kva, &offsets, nr_nodes_to_walk);
assert_eq!(
stats.len(),
2,
"only the 2 populated slots produce stats; trailing zero slots are skipped"
);
assert_eq!(stats[0].node, 0);
assert_eq!(stats[1].node, 1);
}
}