use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::path::Path;
use btf_rs::Btf;
use super::Kva;
use super::bpf_map::{BPF_MAP_TYPE_ARENA, BpfMapInfo};
use super::btf_offsets::{find_struct, load_btf_from_path, member_byte_offset};
use super::guest::GuestKernel;
fn guest_page_size(tcr_el1: u64) -> u64 {
#[cfg(target_arch = "x86_64")]
{
let _ = tcr_el1;
4096
}
#[cfg(target_arch = "aarch64")]
{
match (tcr_el1 >> 30) & 0x3 {
0b10 => 4096,
0b01 => 16384,
0b11 => 65536,
_ => 4096, }
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
let _ = tcr_el1;
4096
}
}
fn guard_half(page_size: u64) -> u64 {
(1u64 << 16).next_multiple_of(page_size << 1) / 2
}
const MAX_ARENA_PAGES: u64 = 4096;
const MAX_ARENA_STRIDE_PROBES: u64 = 256;
const MAX_VM_RANGE_BYTES: u64 = 0x1_0000_0000;
#[derive(Debug, Clone)]
pub struct BpfArenaOffsets {
pub arena_kern_vm: usize,
pub arena_user_vm_start: usize,
pub vm_struct_addr: usize,
}
impl BpfArenaOffsets {
#[allow(dead_code)]
pub fn from_vmlinux(path: &Path) -> Result<Self> {
let btf = load_btf_from_path(path).context("btf: open vmlinux")?;
Self::from_btf(&btf)
}
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (bpf_arena, _) = find_struct(btf, "bpf_arena")
.context("btf: struct bpf_arena not found (arena unsupported on this kernel?)")?;
let arena_kern_vm = member_byte_offset(btf, &bpf_arena, "kern_vm")?;
let arena_user_vm_start = member_byte_offset(btf, &bpf_arena, "user_vm_start")?;
let (vm_struct, _) =
find_struct(btf, "vm_struct").context("btf: struct vm_struct not found")?;
let vm_struct_addr = member_byte_offset(btf, &vm_struct, "addr")?;
Ok(Self {
arena_kern_vm,
arena_user_vm_start,
vm_struct_addr,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub struct ArenaPage {
pub user_addr: u64,
pub bytes: Vec<u8>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[non_exhaustive]
pub struct ArenaSnapshot {
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub pages: Vec<ArenaPage>,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub truncated: bool,
pub declared_pages: u64,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub span_capped: bool,
pub kern_vm_start: u64,
pub user_vm_start: u64,
}
pub fn snapshot_arena(
kernel: &GuestKernel,
info: &BpfMapInfo,
offsets: &BpfArenaOffsets,
) -> ArenaSnapshot {
if info.map_type != BPF_MAP_TYPE_ARENA {
return ArenaSnapshot::default();
}
let mem = kernel.mem();
let walk = kernel.walk_context();
let page_size = guest_page_size(walk.tcr_el1);
let guard_half_bytes = guard_half(page_size);
let arena_kva = info.map_kva;
let Some(arena_pa) = super::idr::translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
arena_kva,
walk.l5,
walk.tcr_el1,
) else {
return ArenaSnapshot::default();
};
let user_vm_start = mem.read_u64(arena_pa, offsets.arena_user_vm_start);
let kern_vm_kva = mem.read_u64(arena_pa, offsets.arena_kern_vm);
if kern_vm_kva == 0 {
return ArenaSnapshot {
user_vm_start,
..ArenaSnapshot::default()
};
}
let Some(vm_struct_pa) = super::idr::translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
kern_vm_kva,
walk.l5,
walk.tcr_el1,
) else {
return ArenaSnapshot {
user_vm_start,
..ArenaSnapshot::default()
};
};
let vm_addr = mem.read_u64(vm_struct_pa, offsets.vm_struct_addr);
if vm_addr == 0 {
return ArenaSnapshot {
user_vm_start,
..ArenaSnapshot::default()
};
}
let kern_vm_start = vm_addr.wrapping_add(guard_half_bytes);
let plan = ArenaWalkPlan::new((info.max_entries as u64) * page_size, page_size);
let mut snapshot = ArenaSnapshot {
pages: Vec::new(),
truncated: plan.truncated,
declared_pages: plan.declared_pages,
span_capped: plan.span_capped,
kern_vm_start,
user_vm_start,
};
let mut scratch: Vec<u8> = Vec::with_capacity(page_size as usize);
let mut try_capture_page = |pgoff: u64, pages: &mut Vec<ArenaPage>| {
let Some(byte_off) = pgoff.checked_mul(page_size) else {
return;
};
let user_addr = user_vm_start.wrapping_add(byte_off);
let kaddr = kern_vm_start.wrapping_add(user_addr & 0xFFFF_FFFF);
let Some(pa) = mem.translate_kva(walk.cr3_pa, Kva(kaddr), walk.l5, walk.tcr_el1) else {
return;
};
if pa + page_size > mem.size() {
return;
}
scratch.clear();
scratch.resize(page_size as usize, 0);
let n = mem.read_bytes(pa, &mut scratch);
scratch.truncate(n);
if scratch.is_empty() {
return;
}
pages.push(ArenaPage {
user_addr,
bytes: std::mem::take(&mut scratch),
});
};
for pgoff in 0..plan.sequential_to {
try_capture_page(pgoff, &mut snapshot.pages);
}
if let Some(stride) = plan.stride {
let mut pgoff = plan.sequential_to;
while pgoff < plan.declared_pages {
try_capture_page(pgoff, &mut snapshot.pages);
pgoff = pgoff.saturating_add(stride);
}
}
snapshot
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct ArenaWalkPlan {
declared_pages: u64,
span_capped: bool,
truncated: bool,
sequential_to: u64,
stride: Option<u64>,
}
impl ArenaWalkPlan {
fn new(raw_span: u64, page_size: u64) -> Self {
let span_capped = raw_span > MAX_VM_RANGE_BYTES;
let span = raw_span.min(MAX_VM_RANGE_BYTES);
let declared_pages = span / page_size;
let sequential_to = declared_pages.min(MAX_ARENA_PAGES);
let truncated = declared_pages > sequential_to;
let stride = if declared_pages > MAX_ARENA_PAGES {
let tail_pages = declared_pages - MAX_ARENA_PAGES;
Some(tail_pages.div_ceil(MAX_ARENA_STRIDE_PROBES).max(1))
} else {
None
};
Self {
declared_pages,
span_capped,
truncated,
sequential_to,
stride,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_arena_offsets_from_vmlinux() {
let path = match crate::monitor::find_test_vmlinux() {
Some(p) => p,
None => return,
};
if path.starts_with("/sys/") {
crate::report::test_skip("vmlinux is raw BTF (skipping ELF-only path)");
return;
}
let offsets = match BpfArenaOffsets::from_vmlinux(&path) {
Ok(o) => o,
Err(e) => {
crate::report::test_skip(format!("arena BTF missing: {e}"));
return;
}
};
assert!(
offsets.arena_user_vm_start > 0,
"user_vm_start follows embedded bpf_map"
);
assert_ne!(
offsets.arena_kern_vm, offsets.arena_user_vm_start,
"kern_vm distinct from user_vm_start"
);
assert!(
offsets.vm_struct_addr > 0,
"vm_struct.addr follows the next/llnode union"
);
}
const TEST_PAGE_SIZE: u64 = 4096;
#[test]
fn arena_walk_plan_constants_sane() {
assert_eq!(MAX_VM_RANGE_BYTES, 0x1_0000_0000);
assert_eq!(MAX_ARENA_PAGES, 4096);
assert_eq!(MAX_ARENA_STRIDE_PROBES, 256);
}
#[test]
fn arena_walk_plan_single_page() {
let plan = ArenaWalkPlan::new(TEST_PAGE_SIZE, TEST_PAGE_SIZE);
assert_eq!(plan.declared_pages, 1);
assert!(!plan.span_capped);
assert!(!plan.truncated);
assert_eq!(plan.sequential_to, 1);
assert_eq!(plan.stride, None);
}
#[test]
fn arena_walk_plan_exactly_max_arena_pages() {
let plan = ArenaWalkPlan::new(MAX_ARENA_PAGES * TEST_PAGE_SIZE, TEST_PAGE_SIZE);
assert_eq!(plan.declared_pages, MAX_ARENA_PAGES);
assert!(!plan.truncated);
assert_eq!(plan.sequential_to, MAX_ARENA_PAGES);
assert_eq!(plan.stride, None);
}
#[test]
fn arena_walk_plan_one_page_past_max() {
let plan = ArenaWalkPlan::new((MAX_ARENA_PAGES + 1) * TEST_PAGE_SIZE, TEST_PAGE_SIZE);
assert_eq!(plan.declared_pages, MAX_ARENA_PAGES + 1);
assert!(plan.truncated);
assert_eq!(plan.sequential_to, MAX_ARENA_PAGES);
assert_eq!(plan.stride, Some(1));
}
#[test]
fn arena_walk_plan_full_4gib() {
let raw = MAX_VM_RANGE_BYTES;
let plan = ArenaWalkPlan::new(raw, TEST_PAGE_SIZE);
assert_eq!(plan.declared_pages, raw / TEST_PAGE_SIZE);
assert!(!plan.span_capped, "exactly 4 GiB is at the cap, not above");
assert!(plan.truncated);
assert_eq!(plan.sequential_to, MAX_ARENA_PAGES);
let stride = plan.stride.expect("stride mode for >MAX_ARENA_PAGES");
let tail = plan.declared_pages - MAX_ARENA_PAGES;
assert!(stride * MAX_ARENA_STRIDE_PROBES >= tail);
assert!((stride - 1) * MAX_ARENA_STRIDE_PROBES < tail);
}
#[test]
fn arena_walk_plan_caps_at_4gib() {
let plan = ArenaWalkPlan::new(2 * MAX_VM_RANGE_BYTES, TEST_PAGE_SIZE);
assert!(plan.span_capped);
assert_eq!(plan.declared_pages, MAX_VM_RANGE_BYTES / TEST_PAGE_SIZE);
assert!(plan.truncated);
assert!(plan.stride.is_some());
}
#[test]
fn arena_walk_plan_caps_corrupt_u64_max_span() {
let plan = ArenaWalkPlan::new(u64::MAX, TEST_PAGE_SIZE);
assert!(plan.span_capped);
assert_eq!(plan.declared_pages, MAX_VM_RANGE_BYTES / TEST_PAGE_SIZE);
assert!(plan.truncated);
}
#[test]
fn arena_walk_plan_zero_span() {
let plan = ArenaWalkPlan::new(0, TEST_PAGE_SIZE);
assert_eq!(plan.declared_pages, 0);
assert!(!plan.span_capped);
assert!(!plan.truncated);
assert_eq!(plan.sequential_to, 0);
assert_eq!(plan.stride, None);
}
#[test]
fn arena_walk_plan_stride_visits_every_pgoff_when_short_tail() {
let plan = ArenaWalkPlan::new((MAX_ARENA_PAGES + 50) * TEST_PAGE_SIZE, TEST_PAGE_SIZE);
assert_eq!(plan.stride, Some(1));
let mut pgoff = plan.sequential_to;
let mut visited = 0u64;
while pgoff < plan.declared_pages {
visited += 1;
pgoff = pgoff.saturating_add(plan.stride.unwrap());
}
assert_eq!(visited, 50, "every tail page should be visited");
}
#[test]
fn arena_walk_plan_stride_distributes_probes_in_long_tail() {
let plan = ArenaWalkPlan::new(MAX_VM_RANGE_BYTES, TEST_PAGE_SIZE); let mut pgoff = plan.sequential_to;
let mut visited = 0u64;
while pgoff < plan.declared_pages {
visited += 1;
pgoff = pgoff.saturating_add(plan.stride.unwrap());
}
assert!(
visited <= MAX_ARENA_STRIDE_PROBES + 1,
"visited {visited}, expected ≤ {} probes",
MAX_ARENA_STRIDE_PROBES + 1
);
assert!(
visited >= MAX_ARENA_STRIDE_PROBES - 1,
"visited {visited}, expected ≥ {}-ish probes",
MAX_ARENA_STRIDE_PROBES - 1
);
}
#[test]
fn guard_half_matches_kernel_formula() {
assert_eq!(guard_half(4096), 32768);
assert_eq!(guard_half(16384), 32768);
assert_eq!(guard_half(65536), 65536);
}
#[test]
fn guest_page_size_decodes_tg1() {
#[cfg(target_arch = "x86_64")]
{
assert_eq!(guest_page_size(0), 4096);
assert_eq!(guest_page_size(0b01u64 << 30), 4096);
assert_eq!(guest_page_size(0b10u64 << 30), 4096);
assert_eq!(guest_page_size(0b11u64 << 30), 4096);
}
#[cfg(target_arch = "aarch64")]
{
assert_eq!(guest_page_size(0b10u64 << 30), 4096);
assert_eq!(guest_page_size(0b01u64 << 30), 16384);
assert_eq!(guest_page_size(0b11u64 << 30), 65536);
assert_eq!(guest_page_size(0), 4096);
}
}
#[test]
fn arena_walk_plan_16k_granule_single_page() {
let plan = ArenaWalkPlan::new(16384, 16384);
assert_eq!(plan.declared_pages, 1);
assert!(!plan.span_capped);
assert!(!plan.truncated);
assert_eq!(plan.sequential_to, 1);
assert_eq!(plan.stride, None);
}
#[test]
fn arena_walk_plan_16k_granule_full_cap() {
let plan = ArenaWalkPlan::new(MAX_VM_RANGE_BYTES, 16384);
assert_eq!(plan.declared_pages, MAX_VM_RANGE_BYTES / 16384);
assert!(!plan.span_capped);
assert!(plan.truncated);
assert_eq!(plan.sequential_to, MAX_ARENA_PAGES);
}
}