use serde::{Deserialize, Serialize};
use super::btf_offsets::{RHT_PTR_LOCK_BIT, SCX_DSQ_LNODE_ITER_CURSOR, ScxWalkerOffsets};
use super::dump::TaskWalkerEntry;
use super::guest::GuestKernel;
use super::idr::translate_any_kva;
use super::reader::{GuestMem, WalkContext};
const MAX_NODES_PER_LIST: u32 = 4096;
const MAX_RHT_NODES: u32 = 8192;
const MAX_RHT_BUCKETS: u32 = 65_536;
const PER_BUCKET_CHAIN_CAP: u32 = 1024;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[non_exhaustive]
pub struct RqScxState {
pub cpu: u32,
pub nr_running: u32,
pub flags: u32,
pub cpu_released: bool,
pub ops_qseq: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub kick_sync: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub nr_immed: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub rq_clock: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub curr_pid: Option<i32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub curr_comm: Option<String>,
pub runnable_task_kvas: Vec<u64>,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub runnable_truncated: bool,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[non_exhaustive]
pub struct DsqState {
pub id: u64,
pub origin: String,
pub nr: u32,
pub seq: u32,
pub task_kvas: Vec<u64>,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub truncated: bool,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[non_exhaustive]
pub struct ScxSchedState {
pub aborting: bool,
pub bypass_depth: i32,
pub exit_kind: u32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub watchdog_timeout: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub source: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub sched_kva: Option<u64>,
}
#[allow(dead_code)]
pub fn walk_rq_scx(
kernel: &GuestKernel<'_>,
cpu: u32,
rq_kva: u64,
rq_pa: u64,
offsets: &ScxWalkerOffsets,
) -> Option<(RqScxState, Vec<TaskWalkerEntry>)> {
let rq_offs = offsets.rq.as_ref()?;
let scx_rq_offs = offsets.scx_rq.as_ref()?;
let task_offs = offsets.task.as_ref()?;
let mem = kernel.mem();
let walk = kernel.walk_context();
let scx_off = rq_offs.scx;
let nr_running = mem.read_u32(rq_pa, scx_off + scx_rq_offs.nr_running);
let flags = mem.read_u32(rq_pa, scx_off + scx_rq_offs.flags);
let cpu_released = mem.read_u8(rq_pa, scx_off + scx_rq_offs.cpu_released) != 0;
let ops_qseq = mem.read_u64(rq_pa, scx_off + scx_rq_offs.ops_qseq);
let kick_sync = scx_rq_offs
.kick_sync
.map(|off| mem.read_u64(rq_pa, scx_off + off));
let nr_immed = scx_rq_offs
.nr_immed
.map(|off| mem.read_u32(rq_pa, scx_off + off));
let rq_clock = scx_rq_offs
.clock
.map(|off| mem.read_u64(rq_pa, scx_off + off));
let curr_kva = mem.read_u64(rq_pa, rq_offs.curr);
let (curr_pid, curr_comm) =
read_task_pid_comm(mem, walk, curr_kva, task_offs.pid, task_offs.comm);
let (runnable_task_kvas, runnable_truncated) = if let Some(see_offs) = offsets.see.as_ref() {
let list_head_off = scx_off + scx_rq_offs.runnable_list;
let head_kva = rq_kva.wrapping_add(list_head_off as u64);
let head_pa = rq_pa.wrapping_add(list_head_off as u64);
let runnable_node_off_in_task = task_offs.scx + see_offs.runnable_node;
walk_list_head_for_task_kvas(mem, walk, head_kva, head_pa, runnable_node_off_in_task)
} else {
(Vec::new(), false)
};
let walker_entries: Vec<TaskWalkerEntry> = runnable_task_kvas
.iter()
.map(|&task_kva| TaskWalkerEntry {
task_kva,
is_runnable_in_scx: true,
running_pc: None,
})
.collect();
let state = RqScxState {
cpu,
nr_running,
flags,
cpu_released,
ops_qseq,
kick_sync,
nr_immed,
rq_clock,
curr_pid,
curr_comm,
runnable_task_kvas,
runnable_truncated,
};
Some((state, walker_entries))
}
#[allow(dead_code)]
pub fn read_scx_sched_state(
kernel: &GuestKernel<'_>,
scx_root_kva: u64,
offsets: &ScxWalkerOffsets,
) -> Option<(u64, ScxSchedState)> {
let Some(sched_offs) = offsets.sched.as_ref() else {
tracing::debug!(
"read_scx_sched_state: ScxSchedOffsets BTF sub-group missing — \
vmlinux lacks `struct scx_sched` (kernel without sched_ext or stripped vmlinux)",
);
return None;
};
let mem = kernel.mem();
let walk = kernel.walk_context();
if scx_root_kva == 0 {
tracing::debug!(
"read_scx_sched_state: scx_root_kva is 0 — vmlinux had no \
`scx_root` symbol (pre-6.16 kernel or stripped vmlinux)",
);
return None;
}
let root_pa = kernel.text_kva_to_pa(scx_root_kva);
let sched_kva = mem.read_u64(root_pa, 0);
if sched_kva == 0 {
tracing::debug!(
scx_root_kva = format_args!("{:#x}", scx_root_kva),
root_pa = format_args!("{:#x}", root_pa),
"read_scx_sched_state: *scx_root == 0 — no scheduler attached at the freeze instant",
);
return None;
}
let Some(sched_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
sched_kva,
walk.l5,
walk.tcr_el1,
) else {
tracing::debug!(
sched_kva = format_args!("{:#x}", sched_kva),
"read_scx_sched_state: translate_any_kva failed for sched_kva — \
page-table walk yielded no PA (slab page race or torn read)",
);
return None;
};
let aborting = sched_offs
.aborting
.map(|off| mem.read_u8(sched_pa, off) != 0)
.unwrap_or(false);
let bypass_depth = sched_offs
.bypass_depth
.map(|off| mem.read_u32(sched_pa, off) as i32)
.unwrap_or(0);
let exit_kind = mem.read_u32(sched_pa, sched_offs.exit_kind);
Some((
sched_kva,
ScxSchedState {
aborting,
bypass_depth,
exit_kind,
watchdog_timeout: None,
source: Some(SCX_SCHED_STATE_SOURCE_LIVE.to_string()),
sched_kva: Some(sched_kva),
},
))
}
pub const SCX_SCHED_STATE_SOURCE_LIVE: &str = "live";
pub const SCX_SCHED_STATE_SOURCE_BSS: &str = "bss_snapshot";
const SCX_TASK_CURSOR: u32 = 1 << 31;
#[allow(dead_code)]
pub fn walk_scx_tasks_global(
kernel: &GuestKernel<'_>,
scx_tasks_kva: u64,
tasks_node_off_in_task: usize,
tasks_node_off_in_see: usize,
flags_off_in_see: usize,
) -> Vec<u64> {
if scx_tasks_kva == 0 {
tracing::debug!(
"walk_scx_tasks_global: scx_tasks_kva is 0 — vmlinux had no \
`scx_tasks` symbol (kernel without sched_ext or stripped vmlinux)",
);
return Vec::new();
}
let mem = kernel.mem();
let walk = kernel.walk_context();
let head_kva = scx_tasks_kva;
let head_pa = kernel.text_kva_to_pa(scx_tasks_kva);
let mut task_kvas: Vec<u64> = Vec::new();
let mut node_kva = mem.read_u64(head_pa, 0);
if node_kva == 0 {
tracing::debug!(
scx_tasks_kva = format_args!("{:#x}", scx_tasks_kva),
head_pa = format_args!("{:#x}", head_pa),
"walk_scx_tasks_global: head.next read as 0 — list-head bytes \
unmapped or torn read; no tasks harvested",
);
return task_kvas;
}
let mut visited: u32 = 0;
while node_kva != head_kva {
if visited >= MAX_NODES_PER_LIST {
return task_kvas;
}
visited += 1;
let see_kva = node_kva.wrapping_sub(tasks_node_off_in_see as u64);
let cursor = match translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
see_kva,
walk.l5,
walk.tcr_el1,
) {
Some(see_pa) => {
let flags = mem.read_u32(see_pa, flags_off_in_see);
flags & SCX_TASK_CURSOR != 0
}
None => false,
};
if !cursor {
let task_kva = node_kva.wrapping_sub(tasks_node_off_in_task as u64);
task_kvas.push(task_kva);
}
let Some(node_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
node_kva,
walk.l5,
walk.tcr_el1,
) else {
return task_kvas;
};
let next_kva = mem.read_u64(node_pa, 0);
if next_kva == 0 {
return task_kvas;
}
node_kva = next_kva;
}
task_kvas
}
#[allow(dead_code)]
pub fn walk_local_dsqs(
kernel: &GuestKernel<'_>,
rq_kvas: &[u64],
rq_pas: &[u64],
per_cpu_offsets: &[u64],
offsets: &ScxWalkerOffsets,
) -> Option<(Vec<DsqState>, Vec<TaskWalkerEntry>)> {
let Some(rq_offs) = offsets.rq.as_ref() else {
tracing::debug!(
"walk_local_dsqs: ScxWalkerOffsets.rq sub-group missing — \
local DSQ pass blinded",
);
return None;
};
let Some(scx_rq_offs) = offsets.scx_rq.as_ref() else {
tracing::debug!(
"walk_local_dsqs: ScxWalkerOffsets.scx_rq sub-group missing — \
local DSQ pass blinded",
);
return None;
};
let Some(dsq_offs) = offsets.dsq.as_ref() else {
tracing::debug!(
"walk_local_dsqs: ScxWalkerOffsets.dsq sub-group missing — \
local DSQ pass blinded",
);
return None;
};
let Some(dsq_lnode_offs) = offsets.dsq_lnode.as_ref() else {
tracing::debug!(
"walk_local_dsqs: ScxWalkerOffsets.dsq_lnode sub-group missing — \
local DSQ pass blinded",
);
return None;
};
let Some(task_offs) = offsets.task.as_ref() else {
tracing::debug!(
"walk_local_dsqs: ScxWalkerOffsets.task sub-group missing — \
local DSQ pass blinded",
);
return None;
};
let Some(see_offs) = offsets.see.as_ref() else {
tracing::debug!(
"walk_local_dsqs: ScxWalkerOffsets.see sub-group missing — \
local DSQ pass blinded",
);
return None;
};
let mem = kernel.mem();
let walk = kernel.walk_context();
let mut states: Vec<DsqState> = Vec::new();
let mut entries: Vec<TaskWalkerEntry> = Vec::new();
for (cpu, (&rq_kva, &rq_pa)) in rq_kvas.iter().zip(rq_pas.iter()).enumerate() {
let cpu_off = per_cpu_offsets.get(cpu).copied();
match cpu_off {
Some(off) if off == 0 && cpu > 0 => continue,
None if cpu > 0 => continue,
_ => {}
}
let local_dsq_off = rq_offs.scx + scx_rq_offs.local_dsq;
let dsq_kva = rq_kva.wrapping_add(local_dsq_off as u64);
let dsq_pa = rq_pa.wrapping_add(local_dsq_off as u64);
if let Some((state, e)) = walk_one_dsq(
mem,
walk,
dsq_kva,
dsq_pa,
|| format!("local cpu {cpu}"),
dsq_offs,
dsq_lnode_offs,
task_offs,
see_offs,
) {
entries.extend(e);
states.push(state);
}
}
Some((states, entries))
}
#[allow(dead_code)]
pub fn walk_dsqs(
kernel: &GuestKernel<'_>,
sched_pa: u64,
per_cpu_offsets: &[u64],
nr_nodes: u32,
offsets: &ScxWalkerOffsets,
) -> (Vec<DsqState>, Vec<TaskWalkerEntry>) {
let mem = kernel.mem();
let walk = kernel.walk_context();
let mut dsq_states: Vec<DsqState> = Vec::new();
let mut all_entries: Vec<TaskWalkerEntry> = Vec::new();
let (Some(dsq_offs), Some(dsq_lnode_offs), Some(task_offs), Some(see_offs)) = (
offsets.dsq.as_ref(),
offsets.dsq_lnode.as_ref(),
offsets.task.as_ref(),
offsets.see.as_ref(),
) else {
return (dsq_states, all_entries);
};
if let (Some(sched_offs), Some(pcpu_offs)) =
(offsets.sched.as_ref(), offsets.sched_pcpu.as_ref())
&& let (Some(sched_pcpu_off), Some(bypass_dsq_off)) =
(sched_offs.pcpu, pcpu_offs.bypass_dsq)
{
let pcpu_kva = mem.read_u64(sched_pa, sched_pcpu_off);
if pcpu_kva != 0 {
for (cpu, &cpu_off) in per_cpu_offsets.iter().enumerate() {
if cpu_off == 0 && cpu > 0 {
continue;
}
let dsq_kva = pcpu_kva
.wrapping_add(cpu_off)
.wrapping_add(bypass_dsq_off as u64);
if let Some(dsq_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
dsq_kva,
walk.l5,
walk.tcr_el1,
) && let Some((state, entries)) = walk_one_dsq(
mem,
walk,
dsq_kva,
dsq_pa,
|| format!("bypass cpu {cpu}"),
dsq_offs,
dsq_lnode_offs,
task_offs,
see_offs,
) {
all_entries.extend(entries);
dsq_states.push(state);
}
}
}
}
if let (Some(sched_offs), Some(pnode_offs)) =
(offsets.sched.as_ref(), offsets.sched_pnode.as_ref())
&& let (Some(sched_pnode_off), Some(global_dsq_off)) =
(sched_offs.pnode, pnode_offs.global_dsq)
{
let pnode_kva = mem.read_u64(sched_pa, sched_pnode_off);
if pnode_kva != 0
&& let Some(pnode_arr_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
pnode_kva,
walk.l5,
walk.tcr_el1,
)
{
for node in 0..nr_nodes as u64 {
let pnode_ptr_kva = mem.read_u64(pnode_arr_pa, (node * 8) as usize);
if pnode_ptr_kva == 0 {
continue;
}
let Some(pnode_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
pnode_ptr_kva,
walk.l5,
walk.tcr_el1,
) else {
continue;
};
let dsq_kva = pnode_ptr_kva.wrapping_add(global_dsq_off as u64);
let dsq_pa = pnode_pa.wrapping_add(global_dsq_off as u64);
if let Some((state, entries)) = walk_one_dsq(
mem,
walk,
dsq_kva,
dsq_pa,
|| format!("global node {node}"),
dsq_offs,
dsq_lnode_offs,
task_offs,
see_offs,
) {
all_entries.extend(entries);
dsq_states.push(state);
}
}
}
}
if let (Some(sched_offs), Some(rht_offs)) = (offsets.sched.as_ref(), offsets.rht.as_ref()) {
let rht_pa = sched_pa.wrapping_add(sched_offs.dsq_hash as u64);
let (user_dsqs, user_dsqs_truncated) =
walk_user_dsq_hash(mem, walk, rht_pa, rht_offs, dsq_offs);
if user_dsqs_truncated {
tracing::warn!(
visited = user_dsqs.len(),
cap_buckets = MAX_RHT_BUCKETS,
cap_nodes = MAX_RHT_NODES,
"walk_user_dsq_hash: truncated — bucket-table or node cap fired; \
dsq_kvas list is incomplete",
);
}
for dsq_kva in user_dsqs {
let Some(dsq_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
dsq_kva,
walk.l5,
walk.tcr_el1,
) else {
continue;
};
if let Some((state, entries)) = walk_one_dsq(
mem,
walk,
dsq_kva,
dsq_pa,
|| "user".to_string(),
dsq_offs,
dsq_lnode_offs,
task_offs,
see_offs,
) {
all_entries.extend(entries);
dsq_states.push(state);
}
}
}
(dsq_states, all_entries)
}
#[allow(clippy::too_many_arguments)]
fn walk_one_dsq(
mem: &GuestMem,
walk: WalkContext,
dsq_kva: u64,
dsq_pa: u64,
origin: impl FnOnce() -> String,
dsq_offs: &super::btf_offsets::ScxDispatchQOffsets,
dsq_lnode_offs: &super::btf_offsets::ScxDsqListNodeOffsets,
task_offs: &super::btf_offsets::TaskStructCoreOffsets,
see_offs: &super::btf_offsets::SchedExtEntityOffsets,
) -> Option<(DsqState, Vec<TaskWalkerEntry>)> {
if dsq_pa == 0 {
tracing::debug!(
dsq_kva = format_args!("{:#x}", dsq_kva),
"walk_one_dsq: dsq_pa == 0 — would alias the boot page; \
skipping to avoid surfacing phantom all-zero DSQ state",
);
return None;
}
let origin = origin();
let id = mem.read_u64(dsq_pa, dsq_offs.id);
let nr = mem.read_u32(dsq_pa, dsq_offs.nr);
let seq = mem.read_u32(dsq_pa, dsq_offs.seq);
let head_kva = dsq_kva.wrapping_add(dsq_offs.list as u64);
let head_pa = dsq_pa.wrapping_add(dsq_offs.list as u64);
let dsq_node_off_in_task = task_offs.scx + see_offs.dsq_list + dsq_lnode_offs.node;
let (task_kvas, truncated) = walk_list_head_for_dsq_task_kvas(
mem,
walk,
head_kva,
head_pa,
dsq_node_off_in_task,
dsq_lnode_offs,
);
let entries: Vec<TaskWalkerEntry> = task_kvas
.iter()
.map(|&task_kva| TaskWalkerEntry {
task_kva,
is_runnable_in_scx: false,
running_pc: None,
})
.collect();
Some((
DsqState {
id,
origin,
nr,
seq,
task_kvas,
truncated,
},
entries,
))
}
fn walk_list_head_for_task_kvas(
mem: &GuestMem,
walk: WalkContext,
head_kva: u64,
head_pa: u64,
runnable_node_off_in_task: usize,
) -> (Vec<u64>, bool) {
let mut task_kvas = Vec::new();
let mut node_kva = mem.read_u64(head_pa, 0);
if node_kva == 0 {
return (task_kvas, false);
}
let mut visited: u32 = 0;
while node_kva != head_kva {
if visited >= MAX_NODES_PER_LIST {
return (task_kvas, true);
}
visited += 1;
let task_kva = node_kva.wrapping_sub(runnable_node_off_in_task as u64);
task_kvas.push(task_kva);
let Some(node_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
node_kva,
walk.l5,
walk.tcr_el1,
) else {
return (task_kvas, false);
};
let next_kva = mem.read_u64(node_pa, 0);
if next_kva == 0 {
return (task_kvas, false);
}
node_kva = next_kva;
}
(task_kvas, false)
}
fn walk_list_head_for_dsq_task_kvas(
mem: &GuestMem,
walk: WalkContext,
head_kva: u64,
head_pa: u64,
dsq_node_off_in_task: usize,
dsq_lnode_offs: &super::btf_offsets::ScxDsqListNodeOffsets,
) -> (Vec<u64>, bool) {
let mut task_kvas = Vec::new();
let mut node_kva = mem.read_u64(head_pa, 0);
if node_kva == 0 {
return (task_kvas, false);
}
let mut visited: u32 = 0;
while node_kva != head_kva {
if visited >= MAX_NODES_PER_LIST {
return (task_kvas, true);
}
visited += 1;
let lnode_kva = node_kva.wrapping_sub(dsq_lnode_offs.node as u64);
let is_cursor = match translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
lnode_kva,
walk.l5,
walk.tcr_el1,
) {
Some(lnode_pa) => {
let lnode_flags = mem.read_u32(lnode_pa, dsq_lnode_offs.flags);
Some(lnode_flags & SCX_DSQ_LNODE_ITER_CURSOR != 0)
}
None => None,
};
let skip_entry = match is_cursor {
Some(true) => true, Some(false) => false, None => true, };
if !skip_entry {
let task_kva = node_kva.wrapping_sub(dsq_node_off_in_task as u64);
task_kvas.push(task_kva);
}
let Some(node_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
node_kva,
walk.l5,
walk.tcr_el1,
) else {
return (task_kvas, false);
};
let next_kva = mem.read_u64(node_pa, 0);
if next_kva == 0 {
return (task_kvas, false);
}
node_kva = next_kva;
}
(task_kvas, false)
}
fn walk_user_dsq_hash(
mem: &GuestMem,
walk: WalkContext,
rht_pa: u64,
rht_offs: &super::btf_offsets::RhashtableOffsets,
dsq_offs: &super::btf_offsets::ScxDispatchQOffsets,
) -> (Vec<u64>, bool) {
let mut dsq_kvas = Vec::new();
let tbl_kva = mem.read_u64(rht_pa, rht_offs.tbl);
if tbl_kva == 0 {
return (dsq_kvas, false);
}
let Some(tbl_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
tbl_kva,
walk.l5,
walk.tcr_el1,
) else {
return (dsq_kvas, false);
};
let size = mem.read_u32(tbl_pa, rht_offs.bucket_table_size);
let bucket_count = size.min(MAX_RHT_BUCKETS) as u64;
let mut truncated = size as u64 > bucket_count;
let buckets_off = rht_offs.bucket_table_buckets;
let mut total_nodes: u32 = 0;
for i in 0..bucket_count {
if total_nodes >= MAX_RHT_NODES {
return (dsq_kvas, true);
}
let entry_off = buckets_off + (i as usize) * 8;
let raw_ptr = mem.read_u64(tbl_pa, entry_off);
let head_kva = raw_ptr & !RHT_PTR_LOCK_BIT;
if head_kva == 0 {
continue;
}
let mut node_kva = head_kva;
let mut chain_visited: u32 = 0;
let mut chain_terminated_naturally = false;
while node_kva != 0 && total_nodes < MAX_RHT_NODES && chain_visited < PER_BUCKET_CHAIN_CAP {
chain_visited += 1;
total_nodes += 1;
let dsq_kva = node_kva.wrapping_sub(dsq_offs.hash_node as u64);
dsq_kvas.push(dsq_kva);
let Some(node_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
node_kva,
walk.l5,
walk.tcr_el1,
) else {
chain_terminated_naturally = true;
break;
};
let next_raw = mem.read_u64(node_pa, rht_offs.rhash_head_next);
if next_raw & RHT_PTR_LOCK_BIT != 0 || next_raw == 0 {
chain_terminated_naturally = true;
break;
}
node_kva = next_raw;
}
if !chain_terminated_naturally {
truncated = true;
}
}
(dsq_kvas, truncated)
}
fn read_task_pid_comm(
mem: &GuestMem,
walk: WalkContext,
task_kva: u64,
pid_off: usize,
comm_off: usize,
) -> (Option<i32>, Option<String>) {
if task_kva == 0 {
return (None, None);
}
let Some(task_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
task_kva,
walk.l5,
walk.tcr_el1,
) else {
return (None, None);
};
let pid = mem.read_u32(task_pa, pid_off) as i32;
let mut buf = [0u8; 16];
mem.read_bytes(task_pa + comm_off as u64, &mut buf);
let n = buf.iter().position(|&b| b == 0).unwrap_or(16);
let comm = String::from_utf8_lossy(&buf[..n]).to_string();
(Some(pid), Some(comm))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rq_scx_state_serde_skip_none() {
let s = RqScxState {
cpu: 3,
nr_running: 4,
flags: 0x10,
cpu_released: false,
ops_qseq: 100,
kick_sync: Some(50),
nr_immed: None,
rq_clock: Some(1234567),
curr_pid: None,
curr_comm: None,
runnable_task_kvas: vec![],
runnable_truncated: false,
};
let json = serde_json::to_string(&s).unwrap();
assert!(!json.contains("curr_pid"));
assert!(!json.contains("curr_comm"));
assert!(!json.contains("runnable_truncated"));
assert!(!json.contains("nr_immed"));
assert!(json.contains("\"kick_sync\":50"));
assert!(json.contains("\"cpu\":3"));
assert!(json.contains("\"nr_running\":4"));
}
#[test]
fn rq_scx_state_serde_roundtrip_populated() {
use crate::assert::Verdict;
let s = RqScxState {
cpu: 1,
nr_running: 2,
flags: 0x1,
cpu_released: true,
ops_qseq: 42,
kick_sync: Some(17),
nr_immed: Some(1),
rq_clock: Some(999_999),
curr_pid: Some(1234),
curr_comm: Some("ktstr".into()),
runnable_task_kvas: vec![0xffff_ffff_8000_1000, 0xffff_ffff_8000_2000],
runnable_truncated: true,
};
let json = serde_json::to_string(&s).unwrap();
let parsed: RqScxState = serde_json::from_str(&json).unwrap();
let parsed_cpu = parsed.cpu;
let parsed_nr_running = parsed.nr_running;
let parsed_flags = parsed.flags;
let parsed_cpu_released = parsed.cpu_released;
let parsed_ops_qseq = parsed.ops_qseq;
let parsed_kick_sync = parsed.kick_sync;
let parsed_nr_immed = parsed.nr_immed;
let parsed_rq_clock = parsed.rq_clock;
let parsed_curr_pid = parsed.curr_pid;
let parsed_curr_comm = parsed.curr_comm.clone();
let parsed_runnable_kvas_len = parsed.runnable_task_kvas.len();
let parsed_runnable_truncated = parsed.runnable_truncated;
let mut v = Verdict::new();
crate::claim!(v, parsed_cpu).eq(1u32);
crate::claim!(v, parsed_nr_running).eq(2u32);
crate::claim!(v, parsed_flags).eq(0x1u32);
crate::claim!(v, parsed_cpu_released).eq(true);
crate::claim!(v, parsed_ops_qseq).eq(42u64);
let kick_sync_match = parsed_kick_sync == Some(17u64);
let nr_immed_match = parsed_nr_immed == Some(1u32);
let rq_clock_match = parsed_rq_clock == Some(999_999u64);
crate::claim!(v, kick_sync_match).eq(true);
crate::claim!(v, nr_immed_match).eq(true);
crate::claim!(v, rq_clock_match).eq(true);
let curr_pid_match = parsed_curr_pid == Some(1234);
let curr_comm_match = parsed_curr_comm.as_deref() == Some("ktstr");
crate::claim!(v, curr_pid_match).eq(true);
crate::claim!(v, curr_comm_match).eq(true);
crate::claim!(v, parsed_runnable_kvas_len).eq(2usize);
crate::claim!(v, parsed_runnable_truncated).eq(true);
let r = v.into_result();
assert!(
r.passed,
"rq_scx_state roundtrip claims must all pass: {:?}",
r.details,
);
}
#[test]
fn dsq_state_serde_skip_truncated_when_false() {
let d = DsqState {
id: 0xdead_beef,
origin: "user".into(),
nr: 5,
seq: 100,
task_kvas: vec![],
truncated: false,
};
let json = serde_json::to_string(&d).unwrap();
assert!(!json.contains("truncated"));
assert!(json.contains("\"id\":3735928559"));
assert!(json.contains("\"nr\":5"));
assert!(json.contains("\"seq\":100"));
}
#[test]
fn dsq_state_serde_emits_truncated_when_true() {
let d = DsqState {
id: 1,
origin: "global node 0".into(),
nr: 5000,
seq: 5001,
task_kvas: (0..MAX_NODES_PER_LIST as u64).collect(),
truncated: true,
};
let json = serde_json::to_string(&d).unwrap();
assert!(json.contains("\"truncated\":true"));
}
#[test]
fn scx_sched_state_default_empty() {
let s = ScxSchedState::default();
assert!(!s.aborting);
assert_eq!(s.bypass_depth, 0);
assert_eq!(s.exit_kind, 0);
}
#[test]
fn scx_sched_state_serde_roundtrip() {
use crate::assert::Verdict;
let s = ScxSchedState {
aborting: true,
bypass_depth: 2,
exit_kind: 1027,
..Default::default()
};
let json = serde_json::to_string(&s).unwrap();
let parsed: ScxSchedState = serde_json::from_str(&json).unwrap();
let parsed_aborting = parsed.aborting;
let parsed_bypass_depth = parsed.bypass_depth;
let parsed_exit_kind = parsed.exit_kind;
let mut v = Verdict::new();
crate::claim!(v, parsed_aborting).eq(true);
crate::claim!(v, parsed_bypass_depth).eq(2i32);
crate::claim!(v, parsed_exit_kind).eq(1027u32);
let r = v.into_result();
assert!(
r.passed,
"scx_sched_state roundtrip claims must all pass: {:?}",
r.details,
);
}
#[test]
fn walk_list_head_basic_two_tasks() {
let mut buf = vec![0u8; 0x1000];
let head = 0x100usize;
let n1 = 0x200usize;
let n2 = 0x300usize;
buf[head..head + 8].copy_from_slice(&(n1 as u64).to_le_bytes());
buf[n1..n1 + 8].copy_from_slice(&(n2 as u64).to_le_bytes());
buf[n2..n2 + 8].copy_from_slice(&(head as u64).to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_ptr() as *mut u8, buf.len() as u64) };
let runnable_node_off = 0x10usize;
let (kvas, truncated) = walk_list_head_for_task_kvas(
&mem,
WalkContext::default(),
head as u64,
head as u64,
runnable_node_off,
);
assert!(!truncated);
assert_eq!(kvas.len(), 2);
assert_eq!(kvas[0], (n1 - runnable_node_off) as u64);
assert_eq!(kvas[1], (n2 - runnable_node_off) as u64);
}
#[test]
fn walk_list_head_empty() {
let mut buf = vec![0u8; 0x1000];
let head = 0x100usize;
buf[head..head + 8].copy_from_slice(&(head as u64).to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_ptr() as *mut u8, buf.len() as u64) };
let (kvas, truncated) = walk_list_head_for_task_kvas(
&mem,
WalkContext::default(),
head as u64,
head as u64,
0x10,
);
assert!(!truncated);
assert!(kvas.is_empty());
}
#[test]
fn walk_list_head_zero_next_bails() {
let mut buf = vec![0u8; 0x1000];
let head = 0x100usize;
buf[head..head + 8].copy_from_slice(&0u64.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_ptr() as *mut u8, buf.len() as u64) };
let (kvas, truncated) = walk_list_head_for_task_kvas(
&mem,
WalkContext::default(),
head as u64,
head as u64,
0x10,
);
assert!(!truncated);
assert!(kvas.is_empty());
}
#[test]
fn scx_walker_offsets_missing_groups_reports_all_when_empty() {
let offsets = ScxWalkerOffsets {
rq: None,
scx_rq: None,
task: None,
see: None,
dsq_lnode: None,
dsq: None,
sched: None,
sched_pnode: None,
sched_pcpu: None,
rht: None,
};
let missing = offsets.missing_groups();
assert_eq!(missing.len(), 10);
assert!(missing.contains(&"rq"));
assert!(missing.contains(&"scx_rq"));
assert!(missing.contains(&"task_struct"));
assert!(missing.contains(&"sched_ext_entity"));
assert!(missing.contains(&"scx_dsq_list_node"));
assert!(missing.contains(&"scx_dispatch_q"));
assert!(missing.contains(&"scx_sched"));
assert!(missing.contains(&"scx_sched_pnode"));
assert!(missing.contains(&"scx_sched_pcpu"));
assert!(missing.contains(&"rhashtable/bucket_table/rhash_head"));
}
#[test]
fn scx_walker_offsets_missing_groups_reports_none_when_full() {
use super::super::btf_offsets::{
RhashtableOffsets, RqStructOffsets, SchedExtEntityOffsets, ScxDispatchQOffsets,
ScxDsqListNodeOffsets, ScxRqOffsets, ScxSchedOffsets, ScxSchedPcpuOffsets,
ScxSchedPnodeOffsets, TaskStructCoreOffsets,
};
let offsets = ScxWalkerOffsets {
rq: Some(RqStructOffsets { scx: 0, curr: 8 }),
scx_rq: Some(ScxRqOffsets {
local_dsq: 0,
runnable_list: 64,
nr_running: 96,
flags: 100,
cpu_released: 104,
ops_qseq: 112,
kick_sync: Some(120),
nr_immed: Some(128),
clock: Some(136),
}),
task: Some(TaskStructCoreOffsets {
comm: 100,
pid: 200,
scx: 300,
}),
see: Some(SchedExtEntityOffsets {
runnable_node: 0,
runnable_at: 16,
weight: 24,
slice: 32,
dsq_vtime: 40,
dsq: 48,
dsq_list: 56,
flags: 72,
dsq_flags: 76,
sticky_cpu: 80,
holding_cpu: 84,
tasks_node: 88,
}),
dsq_lnode: Some(ScxDsqListNodeOffsets { node: 0, flags: 16 }),
dsq: Some(ScxDispatchQOffsets {
list: 0,
nr: 16,
seq: 20,
id: 24,
hash_node: 32,
}),
sched: Some(ScxSchedOffsets {
dsq_hash: 0,
pnode: Some(64),
pcpu: Some(72),
aborting: Some(80),
bypass_depth: Some(84),
exit_kind: 88,
}),
sched_pnode: Some(ScxSchedPnodeOffsets {
global_dsq: Some(0),
}),
sched_pcpu: Some(ScxSchedPcpuOffsets {
bypass_dsq: Some(0),
}),
rht: Some(RhashtableOffsets {
tbl: 0,
nelems: 8,
bucket_table_size: 0,
bucket_table_buckets: 16,
rhash_head_next: 0,
}),
};
assert!(offsets.missing_groups().is_empty());
}
#[test]
fn rq_scx_state_authorial_verdict_claims_compose() {
use crate::assert::Verdict;
let s = RqScxState {
cpu: 2,
nr_running: 3,
flags: 0x1,
cpu_released: false,
ops_qseq: 4242,
kick_sync: Some(100),
nr_immed: Some(0),
rq_clock: Some(999_999),
curr_pid: Some(1234),
curr_comm: Some("ktstr-w".into()),
runnable_task_kvas: vec![0xffff_ffff_8000_1000, 0xffff_ffff_8000_2000],
runnable_truncated: false,
};
let mut v = Verdict::new();
crate::claim!(v, s.nr_running).at_least(1);
crate::claim!(v, s.nr_running).at_most(64);
crate::claim!(v, s.runnable_truncated).eq(false);
v.claim_seq("runnable_task_kvas", &s.runnable_task_kvas)
.nonempty();
v.claim_seq("runnable_task_kvas", &s.runnable_task_kvas)
.len_at_most(64);
let r = v.into_result();
assert!(
r.passed,
"authorial claim sequence on populated RqScxState must pass: {:?}",
r.details,
);
}
#[test]
fn rq_scx_state_failing_at_most_records_labeled_detail() {
use crate::assert::Verdict;
let s = RqScxState {
cpu: 0,
nr_running: 100,
flags: 0,
cpu_released: false,
ops_qseq: 0,
kick_sync: None,
nr_immed: None,
rq_clock: None,
curr_pid: None,
curr_comm: None,
runnable_task_kvas: vec![],
runnable_truncated: false,
};
let mut v = Verdict::new();
crate::claim!(v, s.nr_running).at_most(10);
let r = v.into_result();
assert!(!r.passed, "at_most(10) on nr_running=100 must fail");
assert_eq!(
r.details.len(),
1,
"exactly one failing detail must record: {:?}",
r.details,
);
let msg = &r.details[0].message;
assert!(
msg.contains("s.nr_running"),
"detail must carry the macro-stringify label: {msg}",
);
assert!(
msg.contains("at most 10"),
"detail must name the at_most threshold: {msg}",
);
assert!(
msg.contains("100"),
"detail must include the observed value: {msg}",
);
}
#[test]
fn dsq_state_authorial_verdict_claims_compose() {
use crate::assert::Verdict;
let d = DsqState {
id: 0xdead_beef,
origin: "user".into(),
nr: 5,
seq: 100,
task_kvas: vec![0xffff_8000_8000_1000; 5],
truncated: false,
};
let mut v = Verdict::new();
crate::claim!(v, d.nr).at_most(MAX_NODES_PER_LIST);
crate::claim!(v, d.truncated).eq(false);
crate::claim!(v, d.seq).at_least(d.nr);
v.claim_seq("d.task_kvas", &d.task_kvas).len_eq(5);
let r = v.into_result();
assert!(
r.passed,
"authorial claim sequence on populated DsqState must pass: {:?}",
r.details,
);
}
#[test]
fn scx_sched_state_healthy_exit_kind_claim() {
use crate::assert::Verdict;
let healthy = ScxSchedState {
aborting: false,
bypass_depth: 0,
exit_kind: 0,
..Default::default()
};
let mut v = Verdict::new();
crate::claim!(v, healthy.aborting).eq(false);
crate::claim!(v, healthy.bypass_depth).eq(0);
crate::claim!(v, healthy.exit_kind).eq(0u32);
let r = v.into_result();
assert!(r.passed, "healthy-state claims must pass: {:?}", r.details);
let aborted = ScxSchedState {
aborting: true,
bypass_depth: 4,
exit_kind: 1027,
..Default::default()
};
let mut v = Verdict::new();
crate::claim!(v, aborted.exit_kind).eq(0u32);
let r = v.into_result();
assert!(!r.passed, "exit_kind=1027 must fail eq(0)");
}
#[test]
fn walk_scx_tasks_global_zero_kva_returns_empty() {
let mut buf = vec![0u8; 0x1000];
buf[0..8].copy_from_slice(&0xdead_beef_u64.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = crate::monitor::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let kvas = walk_scx_tasks_global(&kernel, 0, 0x10, 0x60, 0x44);
assert!(
kvas.is_empty(),
"scx_tasks_kva=0 must short-circuit before any read"
);
}
#[test]
fn walk_scx_tasks_global_empty_list_returns_empty() {
let head_kva = crate::monitor::symbols::START_KERNEL_MAP + 0x100;
let head_pa = head_kva.wrapping_sub(crate::monitor::symbols::START_KERNEL_MAP) as usize;
let mut buf = vec![0u8; 0x1000];
buf[head_pa..head_pa + 8].copy_from_slice(&head_kva.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = crate::monitor::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0, 0,
false,
);
let kvas = walk_scx_tasks_global(&kernel, head_kva, 0x10, 0x60, 0x44);
assert!(kvas.is_empty(), "empty global list must yield no tasks");
}
#[test]
fn walk_scx_tasks_global_two_tasks_round_trip() {
let head_kva = crate::monitor::symbols::START_KERNEL_MAP + 0x100;
let head_pa = 0x100usize;
let t1_node_kva: u64 = 0x800;
let t2_node_kva: u64 = 0x900;
let tasks_node_off_in_task: usize = 0x40;
let tasks_node_off_in_see: usize = 0x60;
let flags_off_in_see: usize = 0x44;
let mut buf = vec![0u8; 0x1000];
buf[head_pa..head_pa + 8].copy_from_slice(&t1_node_kva.to_le_bytes());
let t1_pa = t1_node_kva as usize;
let t2_pa = t2_node_kva as usize;
buf[t1_pa..t1_pa + 8].copy_from_slice(&t2_node_kva.to_le_bytes());
buf[t2_pa..t2_pa + 8].copy_from_slice(&head_kva.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = crate::monitor::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let kvas = walk_scx_tasks_global(
&kernel,
head_kva,
tasks_node_off_in_task,
tasks_node_off_in_see,
flags_off_in_see,
);
assert_eq!(kvas.len(), 2, "two-task list must yield two task kvas");
assert_eq!(
kvas[0],
t1_node_kva.wrapping_sub(tasks_node_off_in_task as u64)
);
assert_eq!(
kvas[1],
t2_node_kva.wrapping_sub(tasks_node_off_in_task as u64)
);
}
#[test]
fn walk_local_dsqs_none_when_offsets_missing() {
let mut buf = vec![0u8; 0x1000];
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = crate::monitor::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let offsets = ScxWalkerOffsets {
rq: None, scx_rq: None,
task: None,
see: None,
dsq_lnode: None,
dsq: None,
sched: None,
sched_pnode: None,
sched_pcpu: None,
rht: None,
};
let r = walk_local_dsqs(&kernel, &[], &[], &[], &offsets);
assert!(r.is_none(), "missing offsets must gate to None");
}
#[test]
fn walk_local_dsqs_runs_without_scheduler() {
let rq_kva: u64 = 0x100;
let rq_pa: u64 = 0x100;
let mut buf = vec![0u8; 0x1000];
buf[rq_pa as usize..rq_pa as usize + 8].copy_from_slice(&rq_kva.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = crate::monitor::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let offsets = ScxWalkerOffsets {
rq: Some(crate::monitor::btf_offsets::RqStructOffsets { scx: 0, curr: 8 }),
scx_rq: Some(crate::monitor::btf_offsets::ScxRqOffsets {
local_dsq: 0,
runnable_list: 0,
nr_running: 96,
flags: 100,
cpu_released: 104,
ops_qseq: 112,
kick_sync: None,
nr_immed: None,
clock: None,
}),
task: Some(crate::monitor::btf_offsets::TaskStructCoreOffsets {
comm: 100,
pid: 200,
scx: 0,
}),
see: Some(crate::monitor::btf_offsets::SchedExtEntityOffsets {
runnable_node: 0,
runnable_at: 16,
weight: 24,
slice: 32,
dsq_vtime: 40,
dsq: 48,
dsq_list: 56,
flags: 72,
dsq_flags: 76,
sticky_cpu: 80,
holding_cpu: 84,
tasks_node: 88,
}),
dsq_lnode: Some(crate::monitor::btf_offsets::ScxDsqListNodeOffsets {
node: 0,
flags: 16,
}),
dsq: Some(crate::monitor::btf_offsets::ScxDispatchQOffsets {
list: 0,
nr: 16,
seq: 20,
id: 24,
hash_node: 32,
}),
sched: None,
sched_pnode: None,
sched_pcpu: None,
rht: None,
};
let (states, entries) = walk_local_dsqs(&kernel, &[rq_kva], &[rq_pa], &[0], &offsets)
.expect("offsets present, should yield Some");
assert_eq!(states.len(), 1, "one CPU → one DSQ state");
assert_eq!(states[0].origin, "local cpu 0");
assert!(entries.is_empty());
}
#[test]
fn walk_scx_tasks_global_skips_cursor_entries() {
let head_kva = crate::monitor::symbols::START_KERNEL_MAP + 0x100;
let head_pa = 0x100usize;
let t1_node_kva: u64 = 0x800;
let cursor_node_kva: u64 = 0xa00;
let t2_node_kva: u64 = 0xc00;
let tasks_node_off_in_task: usize = 0x40;
let tasks_node_off_in_see: usize = 0x60;
let flags_off_in_see: usize = 0x44;
let mut buf = vec![0u8; 0x1000];
buf[head_pa..head_pa + 8].copy_from_slice(&t1_node_kva.to_le_bytes());
let t1_pa = t1_node_kva as usize;
let cursor_pa = cursor_node_kva as usize;
let t2_pa = t2_node_kva as usize;
buf[t1_pa..t1_pa + 8].copy_from_slice(&cursor_node_kva.to_le_bytes());
buf[cursor_pa..cursor_pa + 8].copy_from_slice(&t2_node_kva.to_le_bytes());
buf[t2_pa..t2_pa + 8].copy_from_slice(&head_kva.to_le_bytes());
let cursor_see_kva = cursor_node_kva.wrapping_sub(tasks_node_off_in_see as u64);
let cursor_flags_pa = (cursor_see_kva as usize).wrapping_add(flags_off_in_see);
let cursor_flags: u32 = 1 << 31;
buf[cursor_flags_pa..cursor_flags_pa + 4].copy_from_slice(&cursor_flags.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = crate::monitor::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let kvas = walk_scx_tasks_global(
&kernel,
head_kva,
tasks_node_off_in_task,
tasks_node_off_in_see,
flags_off_in_see,
);
assert_eq!(
kvas.len(),
2,
"cursor entry must be filtered; only 2 real tasks remain"
);
let cursor_task_kva = cursor_node_kva.wrapping_sub(tasks_node_off_in_task as u64);
assert!(
!kvas.contains(&cursor_task_kva),
"cursor's container_of result must NOT appear in the task list"
);
assert_eq!(
kvas[0],
t1_node_kva.wrapping_sub(tasks_node_off_in_task as u64)
);
assert_eq!(
kvas[1],
t2_node_kva.wrapping_sub(tasks_node_off_in_task as u64)
);
}
fn dsq_test_offsets() -> ScxWalkerOffsets {
use super::super::btf_offsets::{
RhashtableOffsets, RqStructOffsets, SchedExtEntityOffsets, ScxDispatchQOffsets,
ScxDsqListNodeOffsets, ScxRqOffsets, ScxSchedOffsets, ScxSchedPcpuOffsets,
ScxSchedPnodeOffsets, TaskStructCoreOffsets,
};
ScxWalkerOffsets {
rq: Some(RqStructOffsets { scx: 0, curr: 8 }),
scx_rq: Some(ScxRqOffsets {
local_dsq: 0,
runnable_list: 0,
nr_running: 96,
flags: 100,
cpu_released: 104,
ops_qseq: 112,
kick_sync: None,
nr_immed: None,
clock: None,
}),
task: Some(TaskStructCoreOffsets {
comm: 100,
pid: 200,
scx: 0,
}),
see: Some(SchedExtEntityOffsets {
runnable_node: 0,
runnable_at: 16,
weight: 24,
slice: 32,
dsq_vtime: 40,
dsq: 48,
dsq_list: 56,
flags: 72,
dsq_flags: 76,
sticky_cpu: 80,
holding_cpu: 84,
tasks_node: 88,
}),
dsq_lnode: Some(ScxDsqListNodeOffsets { node: 0, flags: 16 }),
dsq: Some(ScxDispatchQOffsets {
list: 0,
nr: 16,
seq: 20,
id: 24,
hash_node: 32,
}),
sched: Some(ScxSchedOffsets {
dsq_hash: 0x40,
pnode: Some(0x80),
pcpu: Some(0x88),
aborting: Some(0x90),
bypass_depth: Some(0x94),
exit_kind: 0x98,
}),
sched_pnode: Some(ScxSchedPnodeOffsets {
global_dsq: Some(0),
}),
sched_pcpu: Some(ScxSchedPcpuOffsets {
bypass_dsq: Some(0),
}),
rht: Some(RhashtableOffsets {
tbl: 0,
nelems: 8,
bucket_table_size: 0,
bucket_table_buckets: 16,
rhash_head_next: 0,
}),
}
}
#[test]
fn walk_dsqs_partial_passes_yield_partial_results() {
let mut buf = vec![0u8; 0x2000];
let sched_pa: u64 = 0x100;
let pcpu_kva: u64 = 0x300;
buf[(sched_pa + 0x88) as usize..(sched_pa + 0x88) as usize + 8]
.copy_from_slice(&pcpu_kva.to_le_bytes());
buf[pcpu_kva as usize..pcpu_kva as usize + 8].copy_from_slice(&pcpu_kva.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = super::super::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let mut offsets = dsq_test_offsets();
offsets.sched_pnode = None;
offsets.rht = None;
let (states, entries) = walk_dsqs(&kernel, sched_pa, &[0u64], 0, &offsets);
assert_eq!(states.len(), 1, "pass 1 produces one bypass DSQ entry");
assert_eq!(states[0].origin, "bypass cpu 0");
assert!(entries.is_empty(), "empty bypass DSQ → no task entries");
}
#[test]
fn walk_dsqs_all_advanced_offsets_none_yields_empty() {
let mut buf = vec![0u8; 0x1000];
let sched_pa: u64 = 0x100;
buf[sched_pa as usize..sched_pa as usize + 8]
.copy_from_slice(&0xdead_beef_dead_beef_u64.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = super::super::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let mut offsets = dsq_test_offsets();
offsets.sched_pcpu = None;
offsets.sched_pnode = None;
offsets.rht = None;
let (states, entries) = walk_dsqs(&kernel, sched_pa, &[0u64], 1, &offsets);
assert!(
states.is_empty(),
"all advanced offsets None → no DSQ states"
);
assert!(entries.is_empty());
}
#[test]
fn walk_dsqs_user_hash_with_real_page_offset() {
const PAGE_OFFSET: u64 = 0xffff_8880_0000_0000;
let sched_pa: u64 = 0x100;
let rht_pa: u64 = 0x140;
let tbl_pa: u64 = 0x300;
let dsq_pa: u64 = 0x500;
let tbl_kva = tbl_pa.wrapping_add(PAGE_OFFSET);
let dsq_kva_expected = dsq_pa.wrapping_add(PAGE_OFFSET);
let mut buf = vec![0u8; 0x1000];
buf[rht_pa as usize..rht_pa as usize + 8].copy_from_slice(&tbl_kva.to_le_bytes());
buf[tbl_pa as usize..tbl_pa as usize + 4].copy_from_slice(&1u32.to_le_bytes());
buf[(tbl_pa + 16) as usize..(tbl_pa + 16) as usize + 8]
.copy_from_slice(&dsq_kva_expected.to_le_bytes());
buf[dsq_pa as usize..dsq_pa as usize + 8].copy_from_slice(&0u64.to_le_bytes());
buf[(dsq_pa + 24) as usize..(dsq_pa + 24) as usize + 8]
.copy_from_slice(&0xc0ffee_u64.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = super::super::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
PAGE_OFFSET,
0,
false,
);
let mut offsets = dsq_test_offsets();
offsets.sched_pcpu = None;
offsets.sched_pnode = None;
if let Some(dsq_offs) = offsets.dsq.as_mut() {
dsq_offs.hash_node = 0;
}
let (states, _entries) = walk_dsqs(&kernel, sched_pa, &[], 0, &offsets);
assert_eq!(
states.len(),
1,
"Pass 3 must surface the user DSQ when page_offset is non-zero — \
pre-fix the PA-as-KVA bug silently returned 0 user DSQs",
);
assert_eq!(states[0].origin, "user");
assert_eq!(states[0].id, 0xc0ffee);
}
#[test]
fn walk_local_dsqs_one_cpu_empty_one_populated() {
let mut buf = vec![0u8; 0x2000];
let cpu0_rq: u64 = 0x100;
let cpu1_rq: u64 = 0x300;
let task1: u64 = 0x800;
buf[cpu0_rq as usize..cpu0_rq as usize + 8].copy_from_slice(&task1.to_le_bytes());
buf[task1 as usize..task1 as usize + 8].copy_from_slice(&cpu0_rq.to_le_bytes());
buf[(cpu0_rq + 16) as usize..(cpu0_rq + 16) as usize + 4]
.copy_from_slice(&1u32.to_le_bytes()); buf[(cpu0_rq + 20) as usize..(cpu0_rq + 20) as usize + 4]
.copy_from_slice(&10u32.to_le_bytes()); buf[(cpu0_rq + 24) as usize..(cpu0_rq + 24) as usize + 8]
.copy_from_slice(&0xau64.to_le_bytes());
buf[cpu1_rq as usize..cpu1_rq as usize + 8].copy_from_slice(&cpu1_rq.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = super::super::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let offsets = dsq_test_offsets();
let (states, entries) = walk_local_dsqs(
&kernel,
&[cpu0_rq, cpu1_rq],
&[cpu0_rq, cpu1_rq],
&[0, 0x1000],
&offsets,
)
.expect("offsets present, should yield Some");
assert_eq!(
states.len(),
2,
"two CPUs → two DSQ rows, regardless of queue depth"
);
let cpu0 = states.iter().find(|s| s.origin == "local cpu 0").unwrap();
let cpu1 = states.iter().find(|s| s.origin == "local cpu 1").unwrap();
assert_eq!(cpu0.task_kvas.len(), 1, "CPU 0 has one queued task");
assert!(cpu1.task_kvas.is_empty(), "CPU 1 is empty");
assert_eq!(cpu0.id, 0xa);
assert_eq!(cpu0.nr, 1);
assert_eq!(cpu0.seq, 10);
assert_eq!(entries.len(), 1);
}
#[test]
fn walk_local_dsqs_skips_bss_zero_tail_aliases() {
let mut buf = vec![0u8; 0x1000];
let cpu0_rq: u64 = 0x100;
buf[cpu0_rq as usize..cpu0_rq as usize + 8].copy_from_slice(&cpu0_rq.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = super::super::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let offsets = dsq_test_offsets();
let (states, entries) = walk_local_dsqs(
&kernel,
&[cpu0_rq, cpu0_rq, cpu0_rq, cpu0_rq],
&[cpu0_rq, cpu0_rq, cpu0_rq, cpu0_rq],
&[0x100, 0, 0, 0], &offsets,
)
.expect("offsets present, should yield Some");
assert_eq!(
states.len(),
1,
"BSS-zero-tail aliases must be skipped; only CPU 0 surfaces"
);
assert_eq!(states[0].origin, "local cpu 0");
assert!(entries.is_empty());
}
#[test]
fn walk_local_dsqs_skips_bss_zero_tail_with_nonzero_cpu0_offset() {
let runqueues_pa: u64 = 0x300;
let cpu0_rq: u64 = runqueues_pa + 0x100; let bss_rq: u64 = runqueues_pa; let mut buf = vec![0u8; 0x1000];
buf[cpu0_rq as usize..cpu0_rq as usize + 8].copy_from_slice(&cpu0_rq.to_le_bytes());
buf[bss_rq as usize..bss_rq as usize + 8].copy_from_slice(&bss_rq.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = super::super::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let offsets = dsq_test_offsets();
let (states, _entries) = walk_local_dsqs(
&kernel,
&[cpu0_rq, bss_rq],
&[cpu0_rq, bss_rq],
&[0x100, 0], &offsets,
)
.expect("offsets present, should yield Some");
assert_eq!(
states.len(),
1,
"BSS-zero entry must be skipped via cpu_off==0 guard \
even when its rq_pa differs from rq_pas[0]"
);
assert_eq!(states[0].origin, "local cpu 0");
}
#[test]
fn read_scx_sched_state_offsets_sched_none_returns_none() {
let mut buf = vec![0u8; 0x1000];
buf[0..8].copy_from_slice(&0xdead_beef_u64.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = super::super::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let mut offsets = dsq_test_offsets();
offsets.sched = None;
let scx_root_kva = super::super::symbols::START_KERNEL_MAP + 0x10;
let r = read_scx_sched_state(&kernel, scx_root_kva, &offsets);
assert!(r.is_none(), "sched=None must short-circuit before read");
}
#[test]
fn read_scx_sched_state_scx_root_pointer_zero_returns_none() {
let scx_root_kva = super::super::symbols::START_KERNEL_MAP + 0x100;
let scx_root_pa = 0x100usize;
let mut buf = vec![0u8; 0x1000];
buf[scx_root_pa..scx_root_pa + 8].copy_from_slice(&0u64.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = super::super::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let offsets = dsq_test_offsets();
let r = read_scx_sched_state(&kernel, scx_root_kva, &offsets);
assert!(
r.is_none(),
"*scx_root == 0 (no scheduler) → None, no state surfaced"
);
}
#[test]
fn read_scx_sched_state_aborting_offset_none_defaults_false() {
let scx_root_kva = super::super::symbols::START_KERNEL_MAP + 0x100;
let scx_root_pa: usize = 0x100;
let sched_pa: u64 = 0x800;
let mut buf = vec![0u8; 0x1000];
buf[scx_root_pa..scx_root_pa + 8].copy_from_slice(&sched_pa.to_le_bytes());
buf[sched_pa as usize] = 0xff;
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = super::super::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let mut offsets = dsq_test_offsets();
if let Some(s) = offsets.sched.as_mut() {
s.aborting = None;
}
let (sched_kva_out, state) = read_scx_sched_state(&kernel, scx_root_kva, &offsets)
.expect("should yield Some when sched offsets present");
assert_eq!(sched_kva_out, sched_pa);
assert!(
!state.aborting,
"aborting=None must default to false, NOT read sched_pa+0"
);
}
#[test]
fn read_scx_sched_state_bypass_depth_offset_none_defaults_zero() {
let scx_root_kva = super::super::symbols::START_KERNEL_MAP + 0x100;
let scx_root_pa: usize = 0x100;
let sched_pa: u64 = 0x800;
let mut buf = vec![0u8; 0x1000];
buf[scx_root_pa..scx_root_pa + 8].copy_from_slice(&sched_pa.to_le_bytes());
buf[sched_pa as usize..sched_pa as usize + 4]
.copy_from_slice(&0xdead_beef_u32.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let kernel = super::super::guest::GuestKernel::new_for_test(
&mem,
std::collections::HashMap::new(),
0,
0,
false,
);
let mut offsets = dsq_test_offsets();
if let Some(s) = offsets.sched.as_mut() {
s.bypass_depth = None;
}
let (_, state) = read_scx_sched_state(&kernel, scx_root_kva, &offsets)
.expect("should yield Some when sched offsets present");
assert_eq!(
state.bypass_depth, 0,
"bypass_depth=None must default to 0, NOT read sched_pa+0"
);
}
fn rht_test_offsets() -> super::super::btf_offsets::RhashtableOffsets {
super::super::btf_offsets::RhashtableOffsets {
tbl: 0,
nelems: 8,
bucket_table_size: 0,
bucket_table_buckets: 16,
rhash_head_next: 0,
}
}
fn dsq_test_offsets_for_hash() -> super::super::btf_offsets::ScxDispatchQOffsets {
super::super::btf_offsets::ScxDispatchQOffsets {
list: 0,
nr: 16,
seq: 20,
id: 24,
hash_node: 0,
}
}
#[test]
fn walk_user_dsq_hash_per_bucket_chain_cap_truncates() {
let mut buf = vec![0u8; 0x1000];
let rht_pa: u64 = 0x100;
let tbl_kva: u64 = 0x200;
let tbl_pa: u64 = 0x200;
let node_a: u64 = 0x300;
let node_b: u64 = 0x308;
buf[rht_pa as usize..rht_pa as usize + 8].copy_from_slice(&tbl_kva.to_le_bytes());
buf[tbl_pa as usize..tbl_pa as usize + 4].copy_from_slice(&1u32.to_le_bytes());
buf[(tbl_pa + 16) as usize..(tbl_pa + 16) as usize + 8]
.copy_from_slice(&node_a.to_le_bytes());
buf[node_a as usize..node_a as usize + 8].copy_from_slice(&node_b.to_le_bytes());
buf[node_b as usize..node_b as usize + 8].copy_from_slice(&node_a.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let rht_offs = rht_test_offsets();
let dsq_offs = dsq_test_offsets_for_hash();
let (dsq_kvas, truncated) =
walk_user_dsq_hash(&mem, WalkContext::default(), rht_pa, &rht_offs, &dsq_offs);
assert!(
truncated,
"per-bucket chain cap must set truncated=true on a non-terminating chain",
);
assert_eq!(
dsq_kvas.len(),
PER_BUCKET_CHAIN_CAP as usize,
"PER_BUCKET_CHAIN_CAP must admit exactly {} chain visits",
PER_BUCKET_CHAIN_CAP,
);
}
#[test]
fn walk_user_dsq_hash_global_node_cap_truncates() {
let bucket_count: u32 = MAX_RHT_NODES + 1;
let rht_pa: u64 = 0x100;
let tbl_kva: u64 = 0x1000;
let tbl_pa: u64 = 0x1000;
let buckets_off: u64 = 16;
let shared_node: u64 = 0x40000;
let buf_size = (shared_node + 16) as usize;
let mut buf = vec![0u8; buf_size];
buf[rht_pa as usize..rht_pa as usize + 8].copy_from_slice(&tbl_kva.to_le_bytes());
buf[tbl_pa as usize..tbl_pa as usize + 4].copy_from_slice(&bucket_count.to_le_bytes());
for i in 0..bucket_count as u64 {
let off = (tbl_pa + buckets_off + i * 8) as usize;
buf[off..off + 8].copy_from_slice(&shared_node.to_le_bytes());
}
buf[shared_node as usize..shared_node as usize + 8].copy_from_slice(&0u64.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let rht_offs = rht_test_offsets();
let dsq_offs = dsq_test_offsets_for_hash();
let (dsq_kvas, truncated) =
walk_user_dsq_hash(&mem, WalkContext::default(), rht_pa, &rht_offs, &dsq_offs);
assert!(
truncated,
"global node cap (MAX_RHT_NODES) must set truncated=true",
);
assert_eq!(
dsq_kvas.len(),
MAX_RHT_NODES as usize,
"global cap halts the walk at exactly {} nodes",
MAX_RHT_NODES,
);
}
#[test]
fn walk_user_dsq_hash_bucket_table_cap_truncates() {
let mut buf = vec![0u8; 0x300];
let rht_pa: u64 = 0x100;
let tbl_kva: u64 = 0x200;
let tbl_pa: u64 = 0x200;
buf[rht_pa as usize..rht_pa as usize + 8].copy_from_slice(&tbl_kva.to_le_bytes());
let oversize: u32 = MAX_RHT_BUCKETS + 1;
buf[tbl_pa as usize..tbl_pa as usize + 4].copy_from_slice(&oversize.to_le_bytes());
let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
let rht_offs = rht_test_offsets();
let dsq_offs = dsq_test_offsets_for_hash();
let (dsq_kvas, truncated) =
walk_user_dsq_hash(&mem, WalkContext::default(), rht_pa, &rht_offs, &dsq_offs);
assert!(
truncated,
"bucket-table cap (size > MAX_RHT_BUCKETS) must set truncated=true upfront",
);
assert!(
dsq_kvas.is_empty(),
"all buckets read as 0 (out-of-buffer) → no DSQ KVAs collected",
);
}
}