use anyhow::Context;
use super::btf_offsets::BpfProgOffsets;
use super::idr::{translate_any_kva, xa_load};
use super::reader::{GuestMem, WalkContext};
use super::symbols::text_kva_to_pa_with_base;
const BPF_PROG_TYPE_STRUCT_OPS: u32 = 27;
const BPF_OBJ_NAME_LEN: usize = 16;
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ProgVerifierStats {
pub name: String,
pub verified_insns: u32,
}
pub(crate) fn find_struct_ops_progs(
mem: &GuestMem,
walk: WalkContext,
prog_idr_kva: u64,
offsets: &BpfProgOffsets,
start_kernel_map: u64,
phys_base: u64,
) -> Vec<ProgVerifierStats> {
let idr_pa = text_kva_to_pa_with_base(prog_idr_kva, start_kernel_map, phys_base);
let xa_head = mem.read_u64(idr_pa, offsets.idr_xa_head);
if xa_head == 0 {
return Vec::new();
}
let idr_next = mem.read_u32(idr_pa, offsets.idr_next);
let mut progs = Vec::new();
for id in 0..idr_next {
let Some(entry) = xa_load(
mem,
walk.page_offset,
xa_head,
id as u64,
offsets.xa_node_slots,
offsets.xa_node_shift,
) else {
continue;
};
if entry == 0 {
continue;
}
let Some(prog_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
entry,
walk.l5,
walk.tcr_el1,
) else {
continue;
};
let prog_type = mem.read_u32(prog_pa, offsets.prog_type);
if prog_type != BPF_PROG_TYPE_STRUCT_OPS {
continue;
}
let aux_kva = mem.read_u64(prog_pa, offsets.prog_aux);
if aux_kva == 0 {
continue;
}
let Some(aux_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
aux_kva,
walk.l5,
walk.tcr_el1,
) else {
continue;
};
let verified_insns = mem.read_u32(aux_pa, offsets.aux_verified_insns);
let mut name_buf = [0u8; BPF_OBJ_NAME_LEN];
mem.read_bytes(aux_pa + offsets.aux_name as u64, &mut name_buf);
let name_len = name_buf
.iter()
.position(|&b| b == 0)
.unwrap_or(BPF_OBJ_NAME_LEN);
let name = String::from_utf8_lossy(&name_buf[..name_len]).to_string();
progs.push(ProgVerifierStats {
name,
verified_insns,
});
}
progs
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct ProgRuntimeStats {
pub name: String,
pub cnt: u64,
pub nsecs: u64,
pub misses: u64,
}
impl ProgRuntimeStats {
pub fn ns_per_call(&self) -> f64 {
if self.cnt > 0 {
self.nsecs as f64 / self.cnt as f64
} else {
0.0
}
}
pub fn miss_rate(&self) -> f64 {
let total = self.cnt.saturating_add(self.misses);
if total > 0 {
self.misses as f64 / total as f64
} else {
0.0
}
}
}
impl std::fmt::Display for ProgRuntimeStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}: cnt={} nsecs={} misses={}",
self.name, self.cnt, self.nsecs, self.misses
)?;
if self.cnt > 0 {
write!(f, " ns/call={:.3}", self.ns_per_call())?;
}
if self.cnt.saturating_add(self.misses) > 0 && self.misses > 0 {
write!(f, " miss_rate={:.4}", self.miss_rate())?;
}
Ok(())
}
}
pub(crate) fn walk_struct_ops_runtime_stats(
mem: &GuestMem,
walk: WalkContext,
prog_idr_kva: u64,
offsets: &BpfProgOffsets,
per_cpu_offsets: &[u64],
start_kernel_map: u64,
phys_base: u64,
) -> Vec<ProgRuntimeStats> {
let idr_pa = text_kva_to_pa_with_base(prog_idr_kva, start_kernel_map, phys_base);
let xa_head = mem.read_u64(idr_pa, offsets.idr_xa_head);
if xa_head == 0 {
return Vec::new();
}
let idr_next = mem.read_u32(idr_pa, offsets.idr_next);
let mut stats_out = Vec::new();
for id in 0..idr_next {
let Some(entry) = xa_load(
mem,
walk.page_offset,
xa_head,
id as u64,
offsets.xa_node_slots,
offsets.xa_node_shift,
) else {
continue;
};
if entry == 0 {
continue;
}
let Some(prog_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
entry,
walk.l5,
walk.tcr_el1,
) else {
continue;
};
let prog_type = mem.read_u32(prog_pa, offsets.prog_type);
if prog_type != BPF_PROG_TYPE_STRUCT_OPS {
continue;
}
let aux_kva = mem.read_u64(prog_pa, offsets.prog_aux);
if aux_kva == 0 {
continue;
}
let Some(aux_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
aux_kva,
walk.l5,
walk.tcr_el1,
) else {
continue;
};
let mut name_buf = [0u8; BPF_OBJ_NAME_LEN];
mem.read_bytes(aux_pa + offsets.aux_name as u64, &mut name_buf);
let name_len = name_buf
.iter()
.position(|&b| b == 0)
.unwrap_or(BPF_OBJ_NAME_LEN);
let name = String::from_utf8_lossy(&name_buf[..name_len]).to_string();
let stats_percpu_kva = mem.read_u64(prog_pa, offsets.prog_stats);
if stats_percpu_kva == 0 {
continue;
}
let mut cnt: u64 = 0;
let mut nsecs: u64 = 0;
let mut misses: u64 = 0;
for (cpu_index, &cpu_off) in per_cpu_offsets.iter().enumerate() {
if cpu_off == 0 && cpu_index > 0 {
continue;
}
let stats_kva = stats_percpu_kva.wrapping_add(cpu_off);
if let Some(stats_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
stats_kva,
walk.l5,
walk.tcr_el1,
) && stats_pa < mem.size()
{
let lo = offsets
.stats_cnt
.min(offsets.stats_nsecs)
.min(offsets.stats_misses);
let hi = offsets
.stats_cnt
.max(offsets.stats_nsecs)
.max(offsets.stats_misses)
+ 8;
let span = hi - lo;
if span <= 64 {
let mut buf = [0u8; 64];
let n = mem.read_bytes(stats_pa + lo as u64, &mut buf[..span]);
if n == span {
let parse = |off: usize| -> u64 {
let i = off - lo;
u64::from_ne_bytes(buf[i..i + 8].try_into().unwrap())
};
cnt = cnt.saturating_add(parse(offsets.stats_cnt));
nsecs = nsecs.saturating_add(parse(offsets.stats_nsecs));
misses = misses.saturating_add(parse(offsets.stats_misses));
} else {
cnt = cnt.saturating_add(mem.read_u64(stats_pa, offsets.stats_cnt));
nsecs = nsecs.saturating_add(mem.read_u64(stats_pa, offsets.stats_nsecs));
misses =
misses.saturating_add(mem.read_u64(stats_pa, offsets.stats_misses));
}
} else {
cnt = cnt.saturating_add(mem.read_u64(stats_pa, offsets.stats_cnt));
nsecs = nsecs.saturating_add(mem.read_u64(stats_pa, offsets.stats_nsecs));
misses = misses.saturating_add(mem.read_u64(stats_pa, offsets.stats_misses));
}
}
}
stats_out.push(ProgRuntimeStats {
name,
cnt,
nsecs,
misses,
});
}
stats_out
}
pub trait BpfProgAccessor {
fn struct_ops_progs(&self) -> Vec<ProgVerifierStats>;
fn struct_ops_runtime_stats(&self, per_cpu_offsets: &[u64]) -> Vec<ProgRuntimeStats>;
}
pub struct GuestMemProgAccessor<'a> {
kernel: &'a super::guest::GuestKernel<'a>,
prog_idr_kva: u64,
offsets: &'a BpfProgOffsets,
}
impl<'a> GuestMemProgAccessor<'a> {
pub fn from_guest_kernel(
kernel: &'a super::guest::GuestKernel<'a>,
offsets: &'a BpfProgOffsets,
) -> anyhow::Result<Self> {
let prog_idr_kva = kernel
.symbol_kva("prog_idr")
.ok_or_else(|| anyhow::anyhow!("prog_idr symbol not found in vmlinux"))?;
Ok(Self {
kernel,
prog_idr_kva,
offsets,
})
}
}
impl BpfProgAccessor for GuestMemProgAccessor<'_> {
fn struct_ops_progs(&self) -> Vec<ProgVerifierStats> {
find_struct_ops_progs(
self.kernel.mem(),
self.kernel.walk_context(),
self.prog_idr_kva,
self.offsets,
self.kernel.start_kernel_map(),
self.kernel.phys_base(),
)
}
fn struct_ops_runtime_stats(&self, per_cpu_offsets: &[u64]) -> Vec<ProgRuntimeStats> {
walk_struct_ops_runtime_stats(
self.kernel.mem(),
self.kernel.walk_context(),
self.prog_idr_kva,
self.offsets,
per_cpu_offsets,
self.kernel.start_kernel_map(),
self.kernel.phys_base(),
)
}
}
pub struct GuestMemProgAccessorOwned<'a> {
kernel: super::guest::GuestKernel<'a>,
prog_idr_kva: u64,
offsets: BpfProgOffsets,
}
impl<'a> GuestMemProgAccessorOwned<'a> {
pub fn new(
mem: &'a super::reader::GuestMem,
vmlinux: &std::path::Path,
tcr_el1: u64,
cr3_pa: u64,
) -> anyhow::Result<Self> {
let data = std::fs::read(vmlinux)
.with_context(|| format!("read vmlinux: {}", vmlinux.display()))?;
let elf = goblin::elf::Elf::parse(&data).context("parse vmlinux ELF")?;
let kernel = super::guest::GuestKernel::from_elf(mem, &elf, tcr_el1, cr3_pa)?;
let offsets = BpfProgOffsets::from_elf(&elf, &data, vmlinux)?;
let prog_idr_kva = kernel
.symbol_kva("prog_idr")
.ok_or_else(|| anyhow::anyhow!("prog_idr symbol not found in vmlinux"))?;
Ok(Self {
kernel,
prog_idr_kva,
offsets,
})
}
pub fn as_accessor(&self) -> GuestMemProgAccessor<'_> {
GuestMemProgAccessor {
kernel: &self.kernel,
prog_idr_kva: self.prog_idr_kva,
offsets: &self.offsets,
}
}
#[allow(dead_code)]
pub fn guest_kernel(&self) -> &super::guest::GuestKernel<'a> {
&self.kernel
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::monitor::symbols::START_KERNEL_MAP;
#[test]
fn prog_verifier_stats_serde_roundtrip() {
let info = ProgVerifierStats {
name: "dispatch".to_string(),
verified_insns: 42000,
};
let json = serde_json::to_string(&info).unwrap();
let loaded: ProgVerifierStats = serde_json::from_str(&json).unwrap();
assert_eq!(loaded.name, "dispatch");
assert_eq!(loaded.verified_insns, 42000);
}
#[test]
fn prog_verifier_stats_vec_serde_roundtrip() {
let stats = vec![
ProgVerifierStats {
name: "dispatch".to_string(),
verified_insns: 100000,
},
ProgVerifierStats {
name: "enqueue".to_string(),
verified_insns: 50000,
},
];
let json = serde_json::to_vec(&stats).unwrap();
let loaded: Vec<ProgVerifierStats> = serde_json::from_slice(&json).unwrap();
assert_eq!(loaded.len(), 2);
assert_eq!(loaded[0].name, "dispatch");
assert_eq!(loaded[0].verified_insns, 100000);
assert_eq!(loaded[1].name, "enqueue");
assert_eq!(loaded[1].verified_insns, 50000);
}
#[test]
fn prog_verifier_stats_empty_name() {
let info = ProgVerifierStats {
name: String::new(),
verified_insns: 0,
};
let json = serde_json::to_string(&info).unwrap();
let loaded: ProgVerifierStats = serde_json::from_str(&json).unwrap();
assert_eq!(loaded.name, "");
assert_eq!(loaded.verified_insns, 0);
}
#[test]
fn prog_verifier_stats_max_values() {
let info = ProgVerifierStats {
name: "x".repeat(16),
verified_insns: u32::MAX,
};
let json = serde_json::to_string(&info).unwrap();
let loaded: ProgVerifierStats = serde_json::from_str(&json).unwrap();
assert_eq!(loaded.verified_insns, u32::MAX);
assert_eq!(loaded.name.len(), 16);
}
#[test]
fn prog_runtime_stats_serde_roundtrip() {
let info = ProgRuntimeStats {
name: "ktstr_dispatch".to_string(),
cnt: 12345,
nsecs: 9_876_543,
misses: 7,
};
let json = serde_json::to_string(&info).unwrap();
let loaded: ProgRuntimeStats = serde_json::from_str(&json).unwrap();
assert_eq!(loaded.name, "ktstr_dispatch");
assert_eq!(loaded.cnt, 12345);
assert_eq!(loaded.nsecs, 9_876_543);
assert_eq!(loaded.misses, 7);
}
#[test]
fn prog_runtime_stats_max_u64_saturation_roundtrip() {
let info = ProgRuntimeStats {
name: "saturated".to_string(),
cnt: u64::MAX,
nsecs: u64::MAX,
misses: u64::MAX,
};
let json = serde_json::to_string(&info).unwrap();
let loaded: ProgRuntimeStats = serde_json::from_str(&json).unwrap();
assert_eq!(loaded.cnt, u64::MAX);
assert_eq!(loaded.nsecs, u64::MAX);
assert_eq!(loaded.misses, u64::MAX);
}
#[test]
fn prog_runtime_stats_default_zero() {
let info = ProgRuntimeStats::default();
assert_eq!(info.name, "");
assert_eq!(info.cnt, 0);
assert_eq!(info.nsecs, 0);
assert_eq!(info.misses, 0);
}
#[test]
fn prog_runtime_stats_display_format() {
let info = ProgRuntimeStats {
name: "ktstr_enqueue".to_string(),
cnt: 100,
nsecs: 200,
misses: 3,
};
assert_eq!(
format!("{info}"),
"ktstr_enqueue: cnt=100 nsecs=200 misses=3 ns/call=2.000 miss_rate=0.0291",
);
}
#[test]
fn prog_runtime_stats_display_zero_counters_elides_derived() {
let info = ProgRuntimeStats {
name: "never_ran".to_string(),
cnt: 0,
nsecs: 0,
misses: 0,
};
let s = format!("{info}");
assert_eq!(s, "never_ran: cnt=0 nsecs=0 misses=0");
assert!(!s.contains("ns/call"), "ns/call must elide when cnt=0: {s}");
assert!(
!s.contains("miss_rate"),
"miss_rate must elide when total=0: {s}"
);
}
#[test]
fn prog_runtime_stats_display_no_misses_elides_miss_rate() {
let info = ProgRuntimeStats {
name: "healthy".to_string(),
cnt: 1000,
nsecs: 50_000,
misses: 0,
};
let s = format!("{info}");
assert!(s.contains("ns/call=50.000"), "ns/call must render: {s}");
assert!(
!s.contains("miss_rate"),
"miss_rate must elide when misses=0: {s}",
);
}
#[test]
fn prog_runtime_stats_ns_per_call_derived() {
let info = ProgRuntimeStats {
name: "x".to_string(),
cnt: 1000,
nsecs: 50_000,
misses: 0,
};
assert_eq!(info.ns_per_call(), 50.0);
assert!(info.ns_per_call().is_finite());
let info = ProgRuntimeStats {
name: "x".to_string(),
cnt: 0,
nsecs: 999_999,
misses: 0,
};
assert_eq!(info.ns_per_call(), 0.0);
assert!(info.ns_per_call().is_finite());
}
#[test]
fn prog_runtime_stats_miss_rate_derived() {
let info = ProgRuntimeStats {
name: "x".to_string(),
cnt: 1,
nsecs: 0,
misses: 9,
};
assert!((info.miss_rate() - 0.9).abs() < 1e-12);
assert!(info.miss_rate().is_finite());
let info = ProgRuntimeStats::default();
assert_eq!(info.miss_rate(), 0.0);
assert!(info.miss_rate().is_finite());
let info = ProgRuntimeStats {
name: "saturated".to_string(),
cnt: u64::MAX,
nsecs: 0,
misses: 1000,
};
assert!(info.miss_rate().is_finite());
assert!(info.miss_rate() >= 0.0);
}
#[test]
fn prog_runtime_stats_wire_format_omits_derived_keys() {
let info = ProgRuntimeStats {
name: "x".to_string(),
cnt: 100,
nsecs: 200,
misses: 3,
};
let json = serde_json::to_value(&info).unwrap();
let map = match json {
serde_json::Value::Object(m) => m,
other => panic!("expected object, got {other:?}"),
};
assert!(
!map.contains_key("ns_per_call"),
"derived methods must NOT appear as wire fields: {map:#?}",
);
assert!(
!map.contains_key("miss_rate"),
"derived methods must NOT appear as wire fields: {map:#?}",
);
assert_eq!(info.ns_per_call(), 2.0);
assert!((info.miss_rate() - 3.0_f64 / 103.0).abs() < 1e-12);
}
fn synthetic_prog_offsets() -> BpfProgOffsets {
BpfProgOffsets {
prog_type: 0,
prog_aux: 8,
aux_verified_insns: 0,
aux_name: 8,
xa_node_slots: 16,
xa_node_shift: 0,
idr_xa_head: 0,
idr_next: 8,
prog_stats: 16,
stats_cnt: 0,
stats_nsecs: 8,
stats_misses: 16,
}
}
fn walk_struct_ops_runtime_stats_bulk_chain_at_page_offset(page_offset: u64) {
use crate::monitor::reader::{GuestMem, WalkContext};
let total: usize = 0x4000;
let mut buf = vec![0u8; total];
let pa_to_kva = |pa: u64| -> u64 { page_offset.wrapping_add(pa) };
let idr_pa: u64 = 0x0000;
let prog_pa: u64 = 0x1000;
let aux_pa: u64 = 0x2000;
let stats_pa: u64 = 0x3000;
let prog_kva = pa_to_kva(prog_pa);
assert_eq!(prog_kva & 2, 0, "prog_kva must be a leaf entry");
let offsets = synthetic_prog_offsets();
let lo = offsets
.stats_cnt
.min(offsets.stats_nsecs)
.min(offsets.stats_misses);
let hi = offsets
.stats_cnt
.max(offsets.stats_nsecs)
.max(offsets.stats_misses)
+ 8;
assert!(
hi - lo <= 64,
"test premise: stats span must be small enough for the bulk path"
);
let write_u64 = |buf: &mut Vec<u8>, pa: u64, val: u64| {
let off = pa as usize;
buf[off..off + 8].copy_from_slice(&val.to_ne_bytes());
};
let write_u32 = |buf: &mut Vec<u8>, pa: u64, val: u32| {
let off = pa as usize;
buf[off..off + 4].copy_from_slice(&val.to_ne_bytes());
};
write_u64(&mut buf, idr_pa + offsets.idr_xa_head as u64, prog_kva);
write_u32(&mut buf, idr_pa + offsets.idr_next as u64, 1);
write_u32(
&mut buf,
prog_pa + offsets.prog_type as u64,
BPF_PROG_TYPE_STRUCT_OPS,
);
write_u64(
&mut buf,
prog_pa + offsets.prog_aux as u64,
pa_to_kva(aux_pa),
);
write_u64(
&mut buf,
prog_pa + offsets.prog_stats as u64,
pa_to_kva(stats_pa),
);
write_u32(&mut buf, aux_pa + offsets.aux_verified_insns as u64, 12_345);
let name = b"bulk_test";
let name_pa = (aux_pa + offsets.aux_name as u64) as usize;
buf[name_pa..name_pa + name.len()].copy_from_slice(name);
let known_cnt: u64 = 0x1111_1111_1111_1111;
let known_nsecs: u64 = 0x2222_2222_2222_2222;
let known_misses: u64 = 0x3333_3333_3333_3333;
write_u64(&mut buf, stats_pa + offsets.stats_cnt as u64, known_cnt);
write_u64(&mut buf, stats_pa + offsets.stats_nsecs as u64, known_nsecs);
write_u64(
&mut buf,
stats_pa + offsets.stats_misses as u64,
known_misses,
);
let mem = unsafe { GuestMem::new(buf.as_ptr() as *mut u8, buf.len() as u64) };
let walk = WalkContext {
cr3_pa: 0,
page_offset,
l5: false,
tcr_el1: 0,
};
let per_cpu_offsets = vec![0u64];
let prog_idr_kva = idr_pa + START_KERNEL_MAP;
let stats = walk_struct_ops_runtime_stats(
&mem,
walk,
prog_idr_kva,
&offsets,
&per_cpu_offsets,
START_KERNEL_MAP,
0,
);
assert_eq!(stats.len(), 1, "single STRUCT_OPS prog must surface");
assert_eq!(stats[0].name, "bulk_test");
assert_eq!(
stats[0].cnt, known_cnt,
"bulk read must parse cnt at offsets.stats_cnt within the 24-byte window",
);
assert_eq!(
stats[0].nsecs, known_nsecs,
"bulk read must parse nsecs at offsets.stats_nsecs within the 24-byte window",
);
assert_eq!(
stats[0].misses, known_misses,
"bulk read must parse misses at offsets.stats_misses within the 24-byte window",
);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn walk_struct_ops_runtime_stats_bulk_24byte_read_parses_three_offsets() {
walk_struct_ops_runtime_stats_bulk_chain_at_page_offset(0xFFFF_8880_0000_0000);
}
#[test]
#[cfg(target_arch = "aarch64")]
fn walk_struct_ops_runtime_stats_bulk_24byte_read_parses_three_offsets() {
walk_struct_ops_runtime_stats_bulk_chain_at_page_offset(0xFFFF_0000_0000_0000);
}
#[test]
fn prog_runtime_stats_format_chain_inner_appears_in_outer() {
use crate::monitor::dump::{FailureDumpReport, SCHEMA_SINGLE};
let info = ProgRuntimeStats {
name: "chain_test".to_string(),
cnt: 7,
nsecs: 42,
misses: 1,
};
let inner = format!("{info}");
assert_eq!(
inner,
"chain_test: cnt=7 nsecs=42 misses=1 ns/call=6.000 miss_rate=0.1250",
);
let report = FailureDumpReport {
schema: SCHEMA_SINGLE.to_string(),
prog_runtime_stats: vec![info],
..Default::default()
};
let outer = format!("{report}");
assert!(
outer.contains(&inner),
"FailureDumpReport's Display chain must dispatch through \
ProgRuntimeStats::fmt — inner {inner:?} must appear \
verbatim inside outer:\n{outer}",
);
assert!(
outer.contains("prog_runtime_stats:"),
"outer Display must carry the prog_runtime_stats section \
header; without it the chain test could pass even when the \
inner string matched a different format arm:\n{outer}",
);
}
}