use serde::{Deserialize, Serialize};
use anyhow::{Context, Result};
use btf_rs::{Btf, Type};
use super::Kva;
use super::btf_offsets::{StructOrFwd, find_struct_or_fwd, member_byte_offset};
use super::btf_render::{MemReader, RenderedValue, render_value_with_mem};
use super::dump::hex_dump;
use super::guest::GuestKernel;
const SDT_TASK_LEVELS: usize = 3;
const SDT_TASK_ENTS_PER_PAGE_SHIFT: u32 = 9;
const SDT_TASK_ENTS_PER_CHUNK: usize = 1 << SDT_TASK_ENTS_PER_PAGE_SHIFT; const SDT_TASK_CHUNK_BITMAP_U64S: usize = SDT_TASK_ENTS_PER_CHUNK / 64;
pub const MAX_SDT_ALLOC_ENTRIES: usize = 4096;
const SIZEOF_SDT_ID: usize = 8;
const MIN_ELEM_SIZE: u64 = 16;
const MAX_ELEM_SIZE: u64 = 4096;
const MAX_BTF_ID_PROBE: u32 = 100_000;
#[derive(Debug, Clone)]
pub struct SdtAllocOffsets {
pub allocator_pool: usize,
pub allocator_root: usize,
pub allocator_size: usize,
pub pool_elem_size: usize,
pub desc_allocated: usize,
pub desc_nr_free: usize,
pub desc_chunk: usize,
pub chunk_union: usize,
pub data_header_size: usize,
}
impl SdtAllocOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let allocator = require_full_struct(btf, "scx_allocator").context(
"btf: struct scx_allocator unavailable (scheduler doesn't link sdt_alloc, or BTF only carries a forward declaration)"
)?;
let allocator_pool = member_byte_offset(btf, &allocator, "pool")?;
let allocator_root = member_byte_offset(btf, &allocator, "root")?;
let allocator_size = allocator.size();
let pool = require_full_struct(btf, "sdt_pool")
.context("btf: struct sdt_pool unavailable for member offsets")?;
let pool_elem_size = member_byte_offset(btf, &pool, "elem_size")?;
let desc = require_full_struct(btf, "sdt_desc")
.context("btf: struct sdt_desc unavailable for member offsets")?;
let desc_allocated = member_byte_offset(btf, &desc, "allocated")?;
let desc_nr_free = member_byte_offset(btf, &desc, "nr_free")?;
let desc_chunk = member_byte_offset(btf, &desc, "chunk")?;
let chunk_union = match find_struct_or_fwd(btf, "sdt_chunk")
.context("btf: struct sdt_chunk not found")?
{
StructOrFwd::Full(chunk) => chunk_union_offset(btf, &chunk)?,
StructOrFwd::Fwd => 0,
};
let data_header_size =
match find_struct_or_fwd(btf, "sdt_data").context("btf: struct sdt_data not found")? {
StructOrFwd::Full(data) => data.size(),
StructOrFwd::Fwd => SIZEOF_SDT_ID,
};
Ok(Self {
allocator_pool,
allocator_root,
allocator_size,
pool_elem_size,
desc_allocated,
desc_nr_free,
desc_chunk,
chunk_union,
data_header_size,
})
}
}
fn require_full_struct(btf: &Btf, name: &str) -> Result<btf_rs::Struct> {
match find_struct_or_fwd(btf, name)? {
StructOrFwd::Full(s) => Ok(s),
StructOrFwd::Fwd => anyhow::bail!(
"btf: struct {name} present only as BTF_KIND_FWD forward declaration; member offsets unavailable"
),
}
}
fn chunk_union_offset(btf: &Btf, chunk: &btf_rs::Struct) -> Result<usize> {
if let Ok(off) = member_byte_offset(btf, chunk, "descs") {
return Ok(off);
}
if let Ok(off) = member_byte_offset(btf, chunk, "data") {
return Ok(off);
}
anyhow::bail!("btf: struct sdt_chunk has neither `descs` nor `data` member")
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub struct SdtAllocEntry {
pub idx: i32,
pub genn: i32,
pub user_addr: u64,
pub payload: RenderedValue,
}
impl std::fmt::Display for SdtAllocEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"idx={} genn={} user_addr={:#x} payload=",
self.idx, self.genn, self.user_addr
)?;
std::fmt::Display::fmt(&self.payload, f)
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[non_exhaustive]
pub struct SdtAllocatorSnapshot {
pub allocator_name: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub entries: Vec<SdtAllocEntry>,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub truncated: bool,
pub skipped_subtrees: u32,
pub elem_size: u64,
pub payload_btf_type_id: u32,
#[serde(default, skip_serializing_if = "String::is_empty")]
pub payload_type_reason: String,
}
impl std::fmt::Display for SdtAllocatorSnapshot {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"sdt_alloc {} (elem_size={}, btf_type_id={}",
self.allocator_name, self.elem_size, self.payload_btf_type_id
)?;
if !self.payload_type_reason.is_empty() {
write!(f, ", reason={}", self.payload_type_reason)?;
}
write!(f, "): {} live", self.entries.len())?;
if self.truncated {
f.write_str(" (truncated)")?;
}
if self.skipped_subtrees > 0 {
write!(f, " ({} subtrees skipped)", self.skipped_subtrees)?;
}
for entry in &self.entries {
f.write_str("\n ")?;
std::fmt::Display::fmt(entry, f)?;
}
Ok(())
}
}
#[allow(clippy::too_many_arguments)]
pub fn walk_sdt_allocator(
kernel: &GuestKernel<'_>,
kern_vm_start: u64,
allocator_bytes: &[u8],
offsets: &SdtAllocOffsets,
btf: &Btf,
payload_btf_type_id: u32,
payload_type_reason: impl Into<String>,
allocator_name: impl Into<String>,
mem: &dyn MemReader,
) -> SdtAllocatorSnapshot {
let mut snap = SdtAllocatorSnapshot {
allocator_name: allocator_name.into(),
entries: Vec::new(),
truncated: false,
skipped_subtrees: 0,
elem_size: 0,
payload_btf_type_id,
payload_type_reason: payload_type_reason.into(),
};
let pool_off = offsets.allocator_pool + offsets.pool_elem_size;
let Some(elem_size) = read_u64_at(allocator_bytes, pool_off) else {
return snap;
};
if !(MIN_ELEM_SIZE..=MAX_ELEM_SIZE).contains(&elem_size) {
snap.elem_size = elem_size;
return snap;
}
snap.elem_size = elem_size;
let header = offsets.data_header_size;
if elem_size < header as u64 {
return snap;
}
let payload_size = (elem_size - header as u64) as usize;
let Some(root_ptr) = read_u64_at(allocator_bytes, offsets.allocator_root) else {
return snap;
};
if root_ptr == 0 {
return snap;
}
let mut walker = TreeWalker {
kernel,
kern_vm_start,
offsets,
btf,
payload_btf_type_id,
payload_size,
mem,
out: &mut snap,
};
walker.descend(root_ptr, 0);
snap
}
#[derive(Debug, Clone)]
pub struct PayloadTypeChoice {
pub btf_type_id: u32,
pub reason: String,
}
pub fn discover_payload_btf_id(btf: &Btf, payload_size: usize) -> PayloadTypeChoice {
if payload_size == 0 {
return PayloadTypeChoice {
btf_type_id: 0,
reason: "payload_size == 0".into(),
};
}
let mut size_matches: Vec<(u32, String)> = Vec::new();
const CONSECUTIVE_FAIL_CAP: u32 = 64;
let mut tid: u32 = 1;
let mut consecutive_fail: u32 = 0;
while tid < MAX_BTF_ID_PROBE {
match btf.resolve_type_by_id(tid) {
Ok(ty) => {
consecutive_fail = 0;
if let Type::Struct(s) = ty
&& s.size() == payload_size
&& let Ok(name) = btf.resolve_name(&s)
&& !name.is_empty()
{
size_matches.push((tid, name));
}
}
Err(_) => {
consecutive_fail += 1;
if consecutive_fail >= CONSECUTIVE_FAIL_CAP {
break;
}
}
}
tid += 1;
}
match size_matches.len() {
0 => PayloadTypeChoice {
btf_type_id: 0,
reason: format!("no candidate of size {payload_size}"),
},
1 => PayloadTypeChoice {
btf_type_id: size_matches[0].0,
reason: String::new(),
},
n => {
type Pat = fn(&str) -> bool;
let patterns: &[Pat] = &[
|n: &str| n == "task_ctx",
|n: &str| n.ends_with("_arena_ctx"),
|n: &str| n.ends_with("_task_ctx"),
|n: &str| n.ends_with("_ctx"),
];
for pat in patterns {
let hits: Vec<u32> = size_matches
.iter()
.filter(|(_, n)| pat(n))
.map(|(id, _)| *id)
.collect();
match hits.len() {
0 => continue,
1 => {
return PayloadTypeChoice {
btf_type_id: hits[0],
reason: String::new(),
};
}
_ => {
continue;
}
}
}
PayloadTypeChoice {
btf_type_id: 0,
reason: format!("ambiguous: {n} candidates"),
}
}
}
}
struct TreeWalker<'a> {
kernel: &'a GuestKernel<'a>,
kern_vm_start: u64,
offsets: &'a SdtAllocOffsets,
btf: &'a Btf,
payload_btf_type_id: u32,
payload_size: usize,
mem: &'a dyn MemReader,
out: &'a mut SdtAllocatorSnapshot,
}
impl<'a> TreeWalker<'a> {
fn descend(&mut self, desc_ptr: u64, level: usize) {
if self.out.entries.len() >= MAX_SDT_ALLOC_ENTRIES {
self.out.truncated = true;
return;
}
if level >= SDT_TASK_LEVELS {
return;
}
let Some(desc_pa) = self.translate_arena_ptr(desc_ptr) else {
self.out.skipped_subtrees = self.out.skipped_subtrees.saturating_add(1);
return;
};
let mut allocated = [0u64; SDT_TASK_CHUNK_BITMAP_U64S];
let mem = self.kernel.mem();
for (i, slot) in allocated.iter_mut().enumerate() {
*slot = mem.read_u64(desc_pa, self.offsets.desc_allocated + i * 8);
}
let nr_free = mem.read_u64(desc_pa, self.offsets.desc_nr_free);
if nr_free > SDT_TASK_ENTS_PER_CHUNK as u64 {
self.out.skipped_subtrees = self.out.skipped_subtrees.saturating_add(1);
return;
}
let chunk_ptr = mem.read_u64(desc_pa, self.offsets.desc_chunk);
if chunk_ptr == 0 {
self.out.skipped_subtrees = self.out.skipped_subtrees.saturating_add(1);
return;
}
let Some(chunk_pa) = self.translate_arena_ptr(chunk_ptr) else {
self.out.skipped_subtrees = self.out.skipped_subtrees.saturating_add(1);
return;
};
for (word_idx, &word_value) in allocated.iter().enumerate() {
let mut word = word_value;
while word != 0 {
if self.out.entries.len() >= MAX_SDT_ALLOC_ENTRIES {
self.out.truncated = true;
return;
}
let bit = word.trailing_zeros() as usize;
word &= word - 1;
let pos = word_idx * 64 + bit;
if pos >= SDT_TASK_ENTS_PER_CHUNK {
continue;
}
let entry_ptr_off = self.offsets.chunk_union + pos * 8;
let entry_ptr = mem.read_u64(chunk_pa, entry_ptr_off);
if entry_ptr == 0 {
continue;
}
if level == SDT_TASK_LEVELS - 1 {
self.emit_leaf(entry_ptr);
} else {
self.descend(entry_ptr, level + 1);
}
}
}
}
fn emit_leaf(&mut self, data_ptr: u64) {
let Some(data_pa) = self.translate_arena_ptr(data_ptr) else {
self.out.skipped_subtrees = self.out.skipped_subtrees.saturating_add(1);
return;
};
let mem = self.kernel.mem();
let idx = mem.read_u32(data_pa, 0) as i32;
let genn = mem.read_u32(data_pa, 4) as i32;
let mut payload_bytes = vec![0u8; self.payload_size];
let n = mem.read_bytes(
data_pa + self.offsets.data_header_size as u64,
&mut payload_bytes,
);
payload_bytes.truncate(n);
if payload_bytes.is_empty() {
self.out.entries.push(SdtAllocEntry {
idx,
genn,
user_addr: data_ptr & 0xFFFF_FFFF,
payload: RenderedValue::Unsupported {
reason: "payload read failed: end-of-DRAM or unmapped page".into(),
},
});
return;
}
let payload = if self.payload_btf_type_id != 0 {
render_value_with_mem(self.btf, self.payload_btf_type_id, &payload_bytes, self.mem)
} else {
RenderedValue::Bytes {
hex: hex_dump(&payload_bytes),
}
};
self.out.entries.push(SdtAllocEntry {
idx,
genn,
user_addr: data_ptr & 0xFFFF_FFFF,
payload,
});
}
fn translate_arena_ptr(&self, ptr: u64) -> Option<u64> {
if ptr == 0 {
return None;
}
let kva = self.kern_vm_start.wrapping_add(ptr & 0xFFFF_FFFF);
let pa = self.kernel.mem().translate_kva(
self.kernel.cr3_pa(),
Kva(kva),
self.kernel.l5(),
self.kernel.tcr_el1(),
)?;
if pa >= self.kernel.mem().size() {
return None;
}
Some(pa)
}
}
fn read_u64_at(bytes: &[u8], offset: usize) -> Option<u64> {
let end = offset.checked_add(8)?;
let slice = bytes.get(offset..end)?;
let mut buf = [0u8; 8];
buf.copy_from_slice(slice);
Some(u64::from_le_bytes(buf))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn read_u64_at_basic() {
let bytes = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xff];
assert_eq!(read_u64_at(&bytes, 0), Some(0x0807060504030201));
assert_eq!(read_u64_at(&bytes, 2), None);
assert_eq!(read_u64_at(&bytes, 100), None);
}
#[test]
fn read_u64_at_handles_offset_overflow() {
let bytes = [0u8; 16];
assert_eq!(read_u64_at(&bytes, usize::MAX), None);
}
#[test]
fn empty_snapshot_serde() {
let snap = SdtAllocatorSnapshot::default();
let json = serde_json::to_string(&snap).unwrap();
assert!(!json.contains("\"entries\""));
assert!(!json.contains("\"truncated\""));
assert!(!json.contains("\"payload_type_reason\""));
assert!(json.contains("\"elem_size\":0"));
assert!(json.contains("\"allocator_name\":\"\""));
assert!(json.contains("\"skipped_subtrees\":0"));
}
#[test]
fn populated_snapshot_roundtrip() {
let snap = SdtAllocatorSnapshot {
allocator_name: "scx_task_allocator".into(),
entries: vec![SdtAllocEntry {
idx: 7,
genn: 1,
user_addr: 0x1000,
payload: RenderedValue::Bytes {
hex: "de ad be ef".into(),
},
}],
truncated: false,
skipped_subtrees: 2,
elem_size: 24,
payload_btf_type_id: 42,
payload_type_reason: String::new(),
};
let json = serde_json::to_string(&snap).expect("serialize");
let parsed: SdtAllocatorSnapshot = serde_json::from_str(&json).expect("deserialize");
assert_eq!(parsed.entries.len(), 1);
assert_eq!(parsed.entries[0].idx, 7);
assert_eq!(parsed.entries[0].genn, 1);
assert_eq!(parsed.elem_size, 24);
assert_eq!(parsed.payload_btf_type_id, 42);
assert_eq!(parsed.skipped_subtrees, 2);
assert_eq!(parsed.allocator_name, "scx_task_allocator");
}
#[test]
fn truncated_flag_serialises() {
let snap = SdtAllocatorSnapshot {
allocator_name: "x".into(),
entries: vec![],
truncated: true,
skipped_subtrees: 0,
elem_size: 24,
payload_btf_type_id: 0,
payload_type_reason: String::new(),
};
let json = serde_json::to_string(&snap).unwrap();
assert!(json.contains("\"truncated\":true"));
}
#[test]
fn payload_type_reason_serialises_when_nonempty() {
let snap = SdtAllocatorSnapshot {
allocator_name: "x".into(),
entries: vec![],
truncated: false,
skipped_subtrees: 0,
elem_size: 24,
payload_btf_type_id: 0,
payload_type_reason: "no candidate of size 16".into(),
};
let json = serde_json::to_string(&snap).unwrap();
assert!(json.contains("\"payload_type_reason\":\"no candidate of size 16\""));
}
#[test]
fn constants_match_upstream_layout() {
assert_eq!(SDT_TASK_LEVELS, 3);
assert_eq!(SDT_TASK_ENTS_PER_PAGE_SHIFT, 9);
assert_eq!(SDT_TASK_ENTS_PER_CHUNK, 512);
assert_eq!(SDT_TASK_CHUNK_BITMAP_U64S, 8);
assert_eq!(SIZEOF_SDT_ID, 8);
}
#[test]
fn elem_size_bounds_match_kernel() {
const {
assert!(MIN_ELEM_SIZE >= 16);
}
const {
assert!(MAX_ELEM_SIZE <= 4096);
}
const {
assert!(MIN_ELEM_SIZE.is_multiple_of(8));
}
}
#[test]
fn entry_display_shows_idx_genn_user_addr() {
let entry = SdtAllocEntry {
idx: 7,
genn: 1,
user_addr: 0x1000,
payload: RenderedValue::Uint {
bits: 32,
value: 42,
},
};
let out = format!("{entry}");
assert!(out.contains("idx=7"), "missing idx: {out}");
assert!(out.contains("genn=1"), "missing genn: {out}");
assert!(out.contains("user_addr=0x1000"), "missing user_addr: {out}");
assert!(out.contains("payload=42"), "missing payload: {out}");
}
#[test]
fn snapshot_display_shows_header_and_entries() {
let snap = SdtAllocatorSnapshot {
allocator_name: "scx_task_allocator".into(),
entries: vec![SdtAllocEntry {
idx: 7,
genn: 1,
user_addr: 0x1000,
payload: RenderedValue::Uint {
bits: 32,
value: 42,
},
}],
truncated: false,
skipped_subtrees: 0,
elem_size: 24,
payload_btf_type_id: 42,
payload_type_reason: String::new(),
};
let out = format!("{snap}");
assert!(
out.contains("sdt_alloc scx_task_allocator"),
"missing header: {out}"
);
assert!(out.contains("elem_size=24"), "missing elem_size: {out}");
assert!(out.contains("btf_type_id=42"), "missing btf_type_id: {out}");
assert!(out.contains("1 live"), "missing entry count: {out}");
assert!(out.contains("idx=7"), "missing entry render: {out}");
}
#[test]
fn snapshot_display_marks_truncated_and_skipped() {
let snap = SdtAllocatorSnapshot {
allocator_name: "x".into(),
entries: vec![],
truncated: true,
skipped_subtrees: 5,
elem_size: 24,
payload_btf_type_id: 0,
payload_type_reason: "no candidate of size 16".into(),
};
let out = format!("{snap}");
assert!(out.contains("(truncated)"), "missing truncated: {out}");
assert!(
out.contains("(5 subtrees skipped)"),
"missing skipped: {out}"
);
assert!(
out.contains("reason=no candidate of size 16"),
"missing reason: {out}"
);
}
#[test]
fn discover_payload_btf_id_zero_size_short_circuits() {
let path = match crate::monitor::find_test_vmlinux() {
Some(p) => p,
None => {
crate::report::test_skip("no vmlinux for BTF load");
return;
}
};
let btf = match crate::monitor::btf_offsets::load_btf_from_path(&path) {
Ok(b) => b,
Err(_) => {
crate::report::test_skip("BTF load failed");
return;
}
};
let choice = discover_payload_btf_id(&btf, 0);
assert_eq!(choice.btf_type_id, 0, "zero-size must yield btf_type_id=0");
assert_eq!(
choice.reason, "payload_size == 0",
"zero-size reason must be the early-return marker, got: {}",
choice.reason
);
}
#[test]
fn discover_payload_btf_id_no_candidate_path() {
let path = match crate::monitor::find_test_vmlinux() {
Some(p) => p,
None => {
crate::report::test_skip("no vmlinux for BTF load");
return;
}
};
let btf = match crate::monitor::btf_offsets::load_btf_from_path(&path) {
Ok(b) => b,
Err(_) => {
crate::report::test_skip("BTF load failed");
return;
}
};
let impossible_size = usize::MAX / 2;
let choice = discover_payload_btf_id(&btf, impossible_size);
assert_eq!(choice.btf_type_id, 0);
let expected = format!("no candidate of size {impossible_size}");
assert_eq!(
choice.reason, expected,
"reason must exactly match documented format: got '{}'",
choice.reason
);
}
#[test]
fn sdt_alloc_offsets_from_vmlinux_btf_returns_err() {
let path = match crate::monitor::find_test_vmlinux() {
Some(p) => p,
None => {
crate::report::test_skip("no vmlinux for BTF load");
return;
}
};
let btf = match crate::monitor::btf_offsets::load_btf_from_path(&path) {
Ok(b) => b,
Err(_) => {
crate::report::test_skip("BTF load failed");
return;
}
};
let err = SdtAllocOffsets::from_btf(&btf)
.expect_err("vmlinux BTF must NOT contain scx_allocator — from_btf must Err");
let msg = format!("{err:#}");
assert!(
msg.contains("scx_allocator"),
"error must name the missing struct so the dump pipeline can log a useful diagnostic: '{msg}'"
);
}
}