use crate::monitor::btf_offsets::{find_struct, nested_member_byte_offset};
use crate::monitor::guest::GuestKernel;
use crate::monitor::idr::translate_any_kva;
use crate::vmm::wire::{
KERNEL_OP_REASON_MAX, KernelOpDirection, KernelOpEntry, KernelOpReplyPayload,
KernelOpRequestPayload, KernelOpTarget, KernelOpValue,
};
use btf_rs::Btf;
const MAX_TASK_WALKER_NODES: u32 = 65536;
const TASK_DEAD: u32 = 0x80;
const KERNEL_HALF_CONSERVATIVE_5LEVEL: u64 = 0xFF00_0000_0000_0000;
fn validate_direct_target(
kva: u64,
len: u64,
page_offset: u64,
dram_size: u64,
) -> Result<(), String> {
if kva < page_offset {
return Err(format!(
"Direct kva={kva:#x} below page_offset={page_offset:#x} \
(kva_to_pa would wrap; use Kva target for vmalloc/vmemmap)"
));
}
let direct_map_end = page_offset.checked_add(dram_size).ok_or_else(|| {
format!("internal: page_offset+dram_size overflow ({page_offset:#x} + {dram_size:#x})")
})?;
let kva_end = kva
.checked_add(len)
.ok_or_else(|| format!("Direct kva+len overflow ({kva:#x} + {len:#x})"))?;
if kva_end > direct_map_end {
return Err(format!(
"Direct kva={kva:#x} len={len} overruns direct-map end {direct_map_end:#x}"
));
}
Ok(())
}
fn validate_kva_target(kva: u64, len: u64) -> Result<(), String> {
if kva < KERNEL_HALF_CONSERVATIVE_5LEVEL {
return Err(user_half_kva_rejection_reason(kva));
}
let _ = kva
.checked_add(len)
.ok_or_else(|| format!("Kva kva+len overflow ({kva:#x} + {len:#x})"))?;
Ok(())
}
pub(super) fn user_half_kva_rejection_reason(kva: u64) -> String {
format!(
"Kva={kva:#x} below kernel-half 5-level conservative threshold \
{KERNEL_HALF_CONSERVATIVE_5LEVEL:#x}; use Symbol target or a KVA in the \
kernel address space"
)
}
pub(super) fn dispatch_kernel_op_batch(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
req: &KernelOpRequestPayload,
) -> KernelOpReplyPayload {
let request_id = req.request_id;
match req.direction {
KernelOpDirection::Write => {
dispatch_write_batch(kernel, btf, kaslr_offset, request_id, &req.entries)
}
KernelOpDirection::Read => {
dispatch_read_batch(kernel, btf, kaslr_offset, request_id, &req.entries)
}
}
}
fn dispatch_write_batch(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
request_id: u32,
entries: &[KernelOpEntry],
) -> KernelOpReplyPayload {
for (idx, entry) in entries.iter().enumerate() {
if let Err(reason) =
dispatch_one_write(kernel, btf, kaslr_offset, &entry.target, &entry.value)
{
return error_reply(request_id, format!("entry[{idx}]: {reason}"));
}
}
KernelOpReplyPayload {
request_id,
success: true,
reason: String::new(),
read_values: Vec::new(),
}
}
fn dispatch_read_batch(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
request_id: u32,
entries: &[KernelOpEntry],
) -> KernelOpReplyPayload {
let mut read_values: Vec<KernelOpValue> = Vec::with_capacity(entries.len());
for (idx, entry) in entries.iter().enumerate() {
match dispatch_one_read(kernel, btf, kaslr_offset, &entry.target, &entry.value) {
Ok(v) => read_values.push(v),
Err(reason) => return error_reply(request_id, format!("entry[{idx}]: {reason}")),
}
}
KernelOpReplyPayload {
request_id,
success: true,
reason: String::new(),
read_values,
}
}
fn dispatch_one_write(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
target: &KernelOpTarget,
value: &KernelOpValue,
) -> Result<(), String> {
let page_offset = kernel.page_offset();
let dram_size = kernel.mem().size();
match (target, value) {
(KernelOpTarget::Symbol(name), KernelOpValue::U32(v)) => kernel
.write_symbol_u32(name, *v)
.map_err(|e| format!("write_symbol_u32('{name}'): {e:#}")),
(KernelOpTarget::Symbol(name), KernelOpValue::U64(v)) => kernel
.write_symbol_u64(name, *v)
.map_err(|e| format!("write_symbol_u64('{name}'): {e:#}")),
(KernelOpTarget::Symbol(name), KernelOpValue::Bytes(b)) => kernel
.write_symbol_bytes(name, b)
.map_err(|e| format!("write_symbol_bytes('{name}'): {e:#}")),
(KernelOpTarget::Symbol(name), KernelOpValue::OrU32(mask)) => {
let cur = kernel
.read_symbol_u32(name)
.map_err(|e| format!("read_symbol_u32('{name}') for OrU32: {e:#}"))?;
kernel
.write_symbol_u32(name, cur | mask)
.map_err(|e| format!("write_symbol_u32('{name}') for OrU32: {e:#}"))
}
(KernelOpTarget::Direct(kva), KernelOpValue::U32(v)) => {
validate_direct_target(*kva, 4, page_offset, dram_size)?;
kernel.write_direct_u32(*kva, *v);
Ok(())
}
(KernelOpTarget::Direct(kva), KernelOpValue::U64(v)) => {
validate_direct_target(*kva, 8, page_offset, dram_size)?;
kernel.write_direct_u64(*kva, *v);
Ok(())
}
(KernelOpTarget::Direct(kva), KernelOpValue::Bytes(b)) => {
validate_direct_target(*kva, b.len() as u64, page_offset, dram_size)?;
kernel.write_direct_bytes(*kva, b);
Ok(())
}
(KernelOpTarget::Direct(kva), KernelOpValue::OrU32(mask)) => {
validate_direct_target(*kva, 4, page_offset, dram_size)?;
let cur = kernel.read_direct_u32(*kva);
kernel.write_direct_u32(*kva, cur | mask);
Ok(())
}
(KernelOpTarget::Kva(kva), KernelOpValue::U32(v)) => {
validate_kva_target(*kva, 4)?;
kernel
.write_kva_u32(*kva, *v)
.ok_or_else(|| format!("write_kva_u32({kva:#x}): page unmapped"))
}
(KernelOpTarget::Kva(kva), KernelOpValue::U64(v)) => {
validate_kva_target(*kva, 8)?;
kernel
.write_kva_u64(*kva, *v)
.ok_or_else(|| format!("write_kva_u64({kva:#x}): page unmapped"))
}
(KernelOpTarget::Kva(kva), KernelOpValue::Bytes(b)) => {
validate_kva_target(*kva, b.len() as u64)?;
kernel
.write_kva_bytes_chunked(*kva, b)
.ok_or_else(|| format!("write_kva_bytes_chunked({kva:#x}): page unmapped or short"))
}
(KernelOpTarget::Kva(kva), KernelOpValue::OrU32(mask)) => {
validate_kva_target(*kva, 4)?;
let cur = kernel
.read_kva_u32(*kva)
.ok_or_else(|| format!("read_kva_u32({kva:#x}) for OrU32: page unmapped"))?;
kernel
.write_kva_u32(*kva, cur | mask)
.ok_or_else(|| format!("write_kva_u32({kva:#x}) for OrU32: page unmapped"))
}
(KernelOpTarget::PerCpuField { symbol, field, cpu }, value) => {
dispatch_per_cpu_field_write(kernel, btf, kaslr_offset, symbol, field, *cpu, value)
}
(
KernelOpTarget::TaskField {
pid,
expected_start_time_ns,
field,
},
value,
) => dispatch_task_field_write(
kernel,
btf,
kaslr_offset,
*pid,
*expected_start_time_ns,
field,
value,
),
}
}
fn dispatch_one_read(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
target: &KernelOpTarget,
width_hint: &KernelOpValue,
) -> Result<KernelOpValue, String> {
let page_offset = kernel.page_offset();
let dram_size = kernel.mem().size();
match (target, width_hint) {
(KernelOpTarget::Symbol(name), KernelOpValue::U32(_)) => kernel
.read_symbol_u32(name)
.map(KernelOpValue::U32)
.map_err(|e| format!("read_symbol_u32('{name}'): {e:#}")),
(KernelOpTarget::Symbol(name), KernelOpValue::U64(_)) => kernel
.read_symbol_u64(name)
.map(KernelOpValue::U64)
.map_err(|e| format!("read_symbol_u64('{name}'): {e:#}")),
(KernelOpTarget::Symbol(name), KernelOpValue::Bytes(placeholder)) => kernel
.read_symbol_bytes(name, placeholder.len())
.map(KernelOpValue::Bytes)
.map_err(|e| format!("read_symbol_bytes('{name}', {}): {e:#}", placeholder.len())),
(KernelOpTarget::Direct(kva), KernelOpValue::U32(_)) => {
validate_direct_target(*kva, 4, page_offset, dram_size)?;
Ok(KernelOpValue::U32(kernel.read_direct_u32(*kva)))
}
(KernelOpTarget::Direct(kva), KernelOpValue::U64(_)) => {
validate_direct_target(*kva, 8, page_offset, dram_size)?;
Ok(KernelOpValue::U64(kernel.read_direct_u64(*kva)))
}
(KernelOpTarget::Direct(kva), KernelOpValue::Bytes(placeholder)) => {
validate_direct_target(*kva, placeholder.len() as u64, page_offset, dram_size)?;
Ok(KernelOpValue::Bytes(
kernel.read_direct_bytes(*kva, placeholder.len()),
))
}
(KernelOpTarget::Kva(kva), KernelOpValue::U32(_)) => {
validate_kva_target(*kva, 4)?;
kernel
.read_kva_u32(*kva)
.map(KernelOpValue::U32)
.ok_or_else(|| format!("read_kva_u32({kva:#x}): page unmapped"))
}
(KernelOpTarget::Kva(kva), KernelOpValue::U64(_)) => {
validate_kva_target(*kva, 8)?;
kernel
.read_kva_u64(*kva)
.map(KernelOpValue::U64)
.ok_or_else(|| format!("read_kva_u64({kva:#x}): page unmapped"))
}
(KernelOpTarget::Kva(kva), KernelOpValue::Bytes(placeholder)) => {
validate_kva_target(*kva, placeholder.len() as u64)?;
kernel
.read_kva_bytes_chunked(*kva, placeholder.len())
.map(KernelOpValue::Bytes)
.ok_or_else(|| {
format!(
"read_kva_bytes_chunked({kva:#x}, {}): page unmapped or short",
placeholder.len()
)
})
}
(KernelOpTarget::PerCpuField { symbol, field, cpu }, width_hint) => {
dispatch_per_cpu_field_read(kernel, btf, kaslr_offset, symbol, field, *cpu, width_hint)
}
(
KernelOpTarget::TaskField {
pid,
expected_start_time_ns,
field,
},
width_hint,
) => dispatch_task_field_read(
kernel,
btf,
kaslr_offset,
*pid,
*expected_start_time_ns,
field,
width_hint,
),
(_, KernelOpValue::OrU32(mask)) => Err(oru32_read_rejection_reason(*mask)),
}
}
fn struct_name_for_per_cpu_symbol(symbol: &str) -> Result<&'static str, String> {
match symbol {
"runqueues" => Ok("rq"),
"kernel_cpustat" => Ok("kernel_cpustat"),
"kstat" => Ok("kernel_stat"),
"tick_cpu_sched" => Ok("tick_sched"),
_ => Err(format!(
"PerCpuField: unknown per-CPU symbol '{symbol}' (v1 supports: \
runqueues, kernel_cpustat, kstat, tick_cpu_sched); extend \
struct_name_for_per_cpu_symbol + KernelSymbols::from_elf to add"
)),
}
}
fn resolve_per_cpu_field_pa(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
symbol: &str,
field: &str,
cpu: u32,
) -> Result<usize, String> {
let btf = btf.ok_or_else(|| {
format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: BTF not loaded in this \
coordinator — cannot resolve struct layout (vmlinux must carry \
CONFIG_DEBUG_INFO_BTF=y output)"
)
})?;
let struct_name = struct_name_for_per_cpu_symbol(symbol)?;
let template_kva = kernel.symbol_kva(symbol).ok_or_else(|| {
format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: '{symbol}' symbol absent \
from vmlinux symtab"
)
})?;
let per_cpu_offset_array_kva = kernel.symbol_kva("__per_cpu_offset").ok_or_else(|| {
format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: '__per_cpu_offset' symbol \
absent — kernel built without SMP"
)
})?;
let per_cpu_offset_array_pa = kernel.text_kva_to_pa(per_cpu_offset_array_kva);
let per_cpu_offset = kernel
.mem()
.read_u64(per_cpu_offset_array_pa, (cpu as usize) * 8);
if per_cpu_offset == 0 && cpu > 0 {
return Err(format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: __per_cpu_offset[{cpu}]=0 \
(cpu beyond nr_cpu_ids; kernel zero-init slot)"
));
}
let per_cpu_kva =
crate::monitor::symbols::per_cpu_kva(template_kva, kaslr_offset, per_cpu_offset);
let kernel_half_floor = kernel.walk_context().page_offset;
if per_cpu_kva < kernel_half_floor {
return Err(format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: per_cpu_kva={per_cpu_kva:#x} \
below kernel page_offset ({kernel_half_floor:#x}) — arithmetic wrap \
or broken template KVA \
(template={template_kva:#x} + kaslr={kaslr_offset:#x} + \
per_cpu_off={per_cpu_offset:#x})"
));
}
let (struct_t, _) = find_struct(btf, struct_name).map_err(|e| {
format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: 'struct {struct_name}' BTF \
lookup: {e:#}"
)
})?;
let field_off = nested_member_byte_offset(btf, &struct_t, field).map_err(|e| {
format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: BTF nested-offset for \
'{field}' within '{struct_name}': {e:#}"
)
})?;
let walk = kernel.walk_context();
let pa = translate_any_kva(
kernel.mem(),
walk.cr3_pa,
walk.page_offset,
per_cpu_kva,
walk.l5,
walk.tcr_el1,
)
.ok_or_else(|| {
format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: per_cpu_kva={per_cpu_kva:#x} \
unmapped (translate_any_kva returned None)"
)
})?;
Ok((pa + field_off as u64) as usize)
}
fn dispatch_per_cpu_field_write(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
symbol: &str,
field: &str,
cpu: u32,
value: &KernelOpValue,
) -> Result<(), String> {
let pa = resolve_per_cpu_field_pa(kernel, btf, kaslr_offset, symbol, field, cpu)? as u64;
match value {
KernelOpValue::U32(v) => {
kernel.mem().write_u32(pa, 0, *v);
Ok(())
}
KernelOpValue::U64(v) => {
kernel.mem().write_u64(pa, 0, *v);
Ok(())
}
KernelOpValue::OrU32(mask) => {
let cur = kernel.mem().read_u32(pa, 0);
kernel.mem().write_u32(pa, 0, cur | mask);
Ok(())
}
KernelOpValue::Bytes(_) => Err(format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: Bytes write not supported \
(per-CPU scheduler fields are scalars)"
)),
}
}
fn dispatch_per_cpu_field_read(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
symbol: &str,
field: &str,
cpu: u32,
width_hint: &KernelOpValue,
) -> Result<KernelOpValue, String> {
let pa = resolve_per_cpu_field_pa(kernel, btf, kaslr_offset, symbol, field, cpu)? as u64;
match width_hint {
KernelOpValue::U32(_) => Ok(KernelOpValue::U32(kernel.mem().read_u32(pa, 0))),
KernelOpValue::U64(_) => Ok(KernelOpValue::U64(kernel.mem().read_u64(pa, 0))),
KernelOpValue::Bytes(_) => Err(format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: Bytes read not supported"
)),
KernelOpValue::OrU32(_) => Err(format!(
"PerCpuField {symbol}.{field}[cpu={cpu}]: OrU32 has no read semantic"
)),
}
}
const START_TIME_PROC_TICK_NS: u64 = 10_000_000;
struct TaskValidationOffsets {
pid: usize,
start_time: usize,
state: usize,
on_rq: usize,
scx_dsq: usize,
scx_runnable_node: usize,
sched_class: usize,
start_boottime: usize,
tasks: usize,
signal: usize,
signal_thread_head: usize,
thread_node: usize,
}
impl TaskValidationOffsets {
fn resolve_from_btf(btf: &Btf) -> Result<Self, String> {
let (task_struct_t, _) = find_struct(btf, "task_struct")
.map_err(|e| format!("BTF: 'struct task_struct' lookup: {e:#}"))?;
let task_resolve = |path: &str| -> Result<usize, String> {
nested_member_byte_offset(btf, &task_struct_t, path)
.map_err(|e| format!("BTF: task_struct.{path} offset: {e:#}"))
};
let (signal_struct_t, _) = find_struct(btf, "signal_struct")
.map_err(|e| format!("BTF: 'struct signal_struct' lookup: {e:#}"))?;
let signal_thread_head = nested_member_byte_offset(btf, &signal_struct_t, "thread_head")
.map_err(|e| format!("BTF: signal_struct.thread_head offset: {e:#}"))?;
Ok(Self {
pid: task_resolve("pid")?,
start_time: task_resolve("start_time")?,
state: task_resolve("__state")?,
on_rq: task_resolve("on_rq")?,
scx_dsq: task_resolve("scx.dsq")?,
scx_runnable_node: task_resolve("scx.runnable_node")?,
sched_class: task_resolve("sched_class")?,
start_boottime: task_resolve("start_boottime")?,
tasks: task_resolve("tasks")?,
signal: task_resolve("signal")?,
signal_thread_head,
thread_node: task_resolve("thread_node")?,
})
}
}
fn find_task_by_pid(
kernel: &GuestKernel,
init_task_kva: u64,
offs: &TaskValidationOffsets,
target_pid: u32,
) -> Result<u64, String> {
let mem = kernel.mem();
let walk = kernel.walk_context();
let pid_off = offs.pid;
let tasks_off = offs.tasks;
let signal_off = offs.signal;
let signal_thread_head_off = offs.signal_thread_head;
let thread_node_off = offs.thread_node;
let head_kva = init_task_kva.checked_add(tasks_off as u64).ok_or_else(|| {
format!(
"find_task_by_pid: head_kva overflow init_task={init_task_kva:#x} + \
tasks_off={tasks_off}"
)
})?;
let head_pa = kernel.text_kva_to_pa(head_kva);
let mut node_kva = mem.read_u64(head_pa, 0);
if node_kva == 0 {
return Err(format!(
"find_task_by_pid: init_task.tasks.next read as 0 at head_pa={head_pa:#x} \
— head bytes unmapped or torn read"
));
}
if node_kva == head_kva {
return Err(format!(
"find_task_by_pid: init_task.tasks is empty (head.next == head) — \
no user tasks exist; cannot resolve pid={target_pid}"
));
}
let mut visited: u32 = 0;
while node_kva != head_kva {
if visited >= MAX_TASK_WALKER_NODES {
return Err(format!(
"find_task_by_pid: walker cap {MAX_TASK_WALKER_NODES} exceeded \
scanning for pid={target_pid} (visited={visited}); list may be \
corrupted (cycle) or pid_max exceeded the cap"
));
}
visited += 1;
let leader_kva = node_kva.wrapping_sub(tasks_off as u64);
if leader_kva == init_task_kva {
return Err(format!(
"find_task_by_pid: candidate task_kva={leader_kva:#x} equals \
init_task_kva={init_task_kva:#x} (pid=0 swapper); init_task \
is not a writable target"
));
}
let Some(leader_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
leader_kva,
walk.l5,
walk.tcr_el1,
) else {
return Err(format!(
"find_task_by_pid: leader task_kva={leader_kva:#x} unmapped \
(visited={visited}); task_struct slab page not present in guest memory"
));
};
let leader_pid = mem.read_u32(leader_pa, pid_off);
if leader_pid == target_pid {
return Ok(leader_kva);
}
let signal_kva = mem.read_u64(leader_pa, signal_off);
if signal_kva != 0 {
let thread_head_kva = signal_kva.wrapping_add(signal_thread_head_off as u64);
if let Some(thread_head_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
thread_head_kva,
walk.l5,
walk.tcr_el1,
) {
let mut thread_node_kva = mem.read_u64(thread_head_pa, 0);
while thread_node_kva != 0 && thread_node_kva != thread_head_kva {
if visited >= MAX_TASK_WALKER_NODES {
return Err(format!(
"find_task_by_pid: walker cap {MAX_TASK_WALKER_NODES} \
exceeded inside thread-group of leader_pid={leader_pid} \
scanning for pid={target_pid}"
));
}
visited += 1;
let thread_kva = thread_node_kva.wrapping_sub(thread_node_off as u64);
if thread_kva != leader_kva {
let Some(thread_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
thread_kva,
walk.l5,
walk.tcr_el1,
) else {
let Some(thread_node_pa) = translate_any_kva(
mem,
walk.cr3_pa,
walk.page_offset,
thread_node_kva,
walk.l5,
walk.tcr_el1,
) else {
break; };
thread_node_kva = mem.read_u64(thread_node_pa, 0);
continue;
};
let thread_pid = mem.read_u32(thread_pa, pid_off);
if thread_pid == target_pid {
return Ok(thread_kva);
}
}
let next_kva = mem.read_u64(
thread_pa_or_node(
mem,
walk.cr3_pa,
walk.page_offset,
walk.l5,
walk.tcr_el1,
thread_kva,
thread_node_kva,
thread_node_off,
),
0,
);
if next_kva == 0 {
break; }
thread_node_kva = next_kva;
}
}
}
let next_kva = mem.read_u64(leader_pa, tasks_off);
if next_kva == 0 {
return Err(format!(
"find_task_by_pid: list_head.next read as 0 at leader_kva={leader_kva:#x} \
(visited={visited}); chain broken before finding pid={target_pid}"
));
}
node_kva = next_kva;
}
Err(format!(
"find_task_by_pid: pid={target_pid} not found in init_task.tasks \
or any leader's signal->thread_head (visited={visited} entries across \
leaders + threads)"
))
}
#[allow(clippy::too_many_arguments)]
fn thread_pa_or_node(
mem: &crate::monitor::reader::GuestMem,
cr3_pa: u64,
page_offset: u64,
l5: bool,
tcr_el1: u64,
thread_kva: u64,
thread_node_kva: u64,
thread_node_off: usize,
) -> u64 {
if let Some(task_pa) = translate_any_kva(mem, cr3_pa, page_offset, thread_kva, l5, tcr_el1) {
task_pa + thread_node_off as u64
} else {
translate_any_kva(mem, cr3_pa, page_offset, thread_node_kva, l5, tcr_el1).unwrap_or(0)
}
}
fn validate_task_for_field_op(
kernel: &GuestKernel,
task_pa: u64,
target_pid: u32,
expected_start_time_ns: u64,
offs: &TaskValidationOffsets,
ext_sched_class_kva: u64,
) -> Result<(), String> {
let mem = kernel.mem();
let pid = mem.read_u32(task_pa, offs.pid);
if pid != target_pid {
return Err(format!(
"validate_task: pid mismatch at task_pa={task_pa:#x} — read pid={pid}, \
expected {target_pid} (likely slab-recycle since walker found this task)"
));
}
let observed_start_time = mem.read_u64(task_pa, offs.start_time);
let skew = observed_start_time.saturating_sub(expected_start_time_ns);
if observed_start_time < expected_start_time_ns || skew >= START_TIME_PROC_TICK_NS {
return Err(format!(
"validate_task: task pid={target_pid} start_time identity mismatch — \
observed={observed_start_time}ns expected in \
[{expected_start_time_ns}, {}]ns; \
original task exited and PID was recycled for an unrelated task",
expected_start_time_ns + START_TIME_PROC_TICK_NS - 1
));
}
let state = mem.read_u32(task_pa, offs.state);
if state & TASK_DEAD != 0 {
return Err(format!(
"validate_task: task pid={target_pid} is TASK_DEAD (state={state:#x}); \
mid-teardown task fields unsafe to write"
));
}
let on_rq = mem.read_u32(task_pa, offs.on_rq);
if on_rq != 0 {
return Err(format!(
"validate_task: task pid={target_pid} is on_rq={on_rq} (TASK_ON_RQ_QUEUED \
or MIGRATING); writing scheduler fields would corrupt rb-tree / DSQ \
ordering. Test author must use a blocking workload pattern \
(`WorkType::FutexPingPong`, `WorkType::WaitOnFutex`, `WorkType::Sleep`) \
so the worker is sleeping at cold-op time"
));
}
let scx_dsq_ptr = mem.read_u64(task_pa, offs.scx_dsq);
if scx_dsq_ptr != 0 {
return Err(format!(
"validate_task: task pid={target_pid} has scx.dsq={scx_dsq_ptr:#x} (queued \
on an SCX DSQ); modifying ordering keys while queued mangles ordering \
per include/linux/sched/ext.h:248-254 (dsq_vtime warning). Test author \
must use a blocking workload pattern \
(`WorkType::FutexPingPong`, `WorkType::WaitOnFutex`, `WorkType::Sleep`)"
));
}
let task_kva = task_pa.wrapping_add(kernel.page_offset());
let runnable_node_kva = task_kva.wrapping_add(offs.scx_runnable_node as u64);
let runnable_node_next = mem.read_u64(task_pa, offs.scx_runnable_node);
if runnable_node_next != 0 && runnable_node_next != runnable_node_kva {
return Err(format!(
"validate_task: task pid={target_pid} scx.runnable_node is linked \
(next={runnable_node_next:#x} != self={runnable_node_kva:#x}); task is \
on a per-rq runnable_list. Test author must use a blocking workload \
pattern (`WorkType::FutexPingPong`, `WorkType::WaitOnFutex`, \
`WorkType::Sleep`)"
));
}
let sched_class_kva = mem.read_u64(task_pa, offs.sched_class);
if sched_class_kva != ext_sched_class_kva {
return Err(format!(
"validate_task: task pid={target_pid} sched_class={sched_class_kva:#x} \
is not ext_sched_class={ext_sched_class_kva:#x}; TaskField writes target \
SCX-managed tasks only (CFS / RT / DL / stop / idle classes have \
different vtime semantics — EEVDF's place_entity overwrites se.vruntime \
on enqueue, RT/DL have RT_BANDWIDTH instant-throttle hazards). Spawn \
the worker under `SchedPolicy::Ext` to make it SCX-managed"
));
}
let start_boottime = mem.read_u64(task_pa, offs.start_boottime);
if start_boottime == 0 {
return Err(format!(
"validate_task: task pid={target_pid} start_boottime=0 — possibly a \
freshly-zeroed slab page mid-slab-recycle; reject rather than risk \
writing to dead memory"
));
}
Ok(())
}
fn resolve_and_validate_task_field(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
pid: u32,
expected_start_time_ns: u64,
) -> Result<(u64, btf_rs::Struct), String> {
let btf = btf.ok_or_else(|| {
format!(
"TaskField pid={pid}: BTF not loaded in this coordinator — cannot resolve \
task_struct layout (vmlinux must carry CONFIG_DEBUG_INFO_BTF=y output)"
)
})?;
let init_task_kva = kernel.symbol_kva("init_task").ok_or_else(|| {
format!(
"TaskField pid={pid}: init_task symbol absent from vmlinux symtab \
(heavily stripped vmlinux); cannot anchor the task-list walker"
)
})?;
let ext_sched_class_link_kva = kernel.symbol_kva("ext_sched_class").ok_or_else(|| {
format!(
"TaskField pid={pid}: ext_sched_class symbol absent from vmlinux symtab \
(kernel built without CONFIG_SCHED_CLASS_EXT=y); TaskField writes are \
SCX-only and require sched_ext support"
)
})?;
let ext_sched_class_kva = ext_sched_class_link_kva.wrapping_add(kaslr_offset);
let val_offs = TaskValidationOffsets::resolve_from_btf(btf)?;
let task_kva = find_task_by_pid(kernel, init_task_kva, &val_offs, pid)?;
let walk = kernel.walk_context();
let task_pa = translate_any_kva(
kernel.mem(),
walk.cr3_pa,
walk.page_offset,
task_kva,
walk.l5,
walk.tcr_el1,
)
.ok_or_else(|| {
format!(
"TaskField pid={pid}: task_kva={task_kva:#x} unmapped at validation step \
(slab page disappeared between walker and validator — extreme race)"
)
})?;
validate_task_for_field_op(
kernel,
task_pa,
pid,
expected_start_time_ns,
&val_offs,
ext_sched_class_kva,
)?;
let (task_struct_t, _) = find_struct(btf, "task_struct")
.map_err(|e| format!("TaskField pid={pid}: 'struct task_struct' BTF lookup: {e:#}"))?;
Ok((task_pa, task_struct_t))
}
fn dispatch_task_field_write(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
pid: u32,
expected_start_time_ns: u64,
field: &str,
value: &KernelOpValue,
) -> Result<(), String> {
let (task_pa, task_struct_t) =
resolve_and_validate_task_field(kernel, btf, kaslr_offset, pid, expected_start_time_ns)?;
let btf = btf.expect("checked in resolve_and_validate_task_field");
let field_off = nested_member_byte_offset(btf, &task_struct_t, field).map_err(|e| {
format!("TaskField pid={pid} field={field:?}: BTF nested-offset resolution: {e:#}")
})?;
match value {
KernelOpValue::U32(v) => {
kernel.mem().write_u32(task_pa, field_off, *v);
Ok(())
}
KernelOpValue::U64(v) => {
kernel.mem().write_u64(task_pa, field_off, *v);
Ok(())
}
KernelOpValue::Bytes(_) => Err(format!(
"TaskField pid={pid} field={field:?}: Bytes write not supported in v1 — \
use U32 or U64 (per-task scheduler fields are scalars)"
)),
KernelOpValue::OrU32(_) => Err(format!(
"TaskField pid={pid} field={field:?}: OrU32 RMW not supported on TaskField \
in v1 (no current use case; per-task scheduler fields are scalars not flags)"
)),
}
}
fn dispatch_task_field_read(
kernel: &GuestKernel,
btf: Option<&Btf>,
kaslr_offset: u64,
pid: u32,
expected_start_time_ns: u64,
field: &str,
width_hint: &KernelOpValue,
) -> Result<KernelOpValue, String> {
let (task_pa, task_struct_t) =
resolve_and_validate_task_field(kernel, btf, kaslr_offset, pid, expected_start_time_ns)?;
let btf = btf.expect("checked in resolve_and_validate_task_field");
let field_off = nested_member_byte_offset(btf, &task_struct_t, field).map_err(|e| {
format!("TaskField pid={pid} field={field:?}: BTF nested-offset resolution: {e:#}")
})?;
match width_hint {
KernelOpValue::U32(_) => Ok(KernelOpValue::U32(
kernel.mem().read_u32(task_pa, field_off),
)),
KernelOpValue::U64(_) => Ok(KernelOpValue::U64(
kernel.mem().read_u64(task_pa, field_off),
)),
KernelOpValue::Bytes(_) => Err(format!(
"TaskField pid={pid} field={field:?}: Bytes read not supported in v1 — \
use U32 or U64 width hint"
)),
KernelOpValue::OrU32(_) => Err(format!(
"TaskField pid={pid} field={field:?}: OrU32 has no read semantic (covered \
by the dispatcher's read-direction catch-all but explicit here for clarity)"
)),
}
}
pub(super) fn oru32_read_rejection_reason(mask: u32) -> String {
format!(
"OrU32(mask={mask:#x}) cannot be used as a Read width — \
RMW is a write operation. For 32-bit reads use \
`KernelValueWidth::u32()` instead."
)
}
fn error_reply(request_id: u32, reason: String) -> KernelOpReplyPayload {
let mut reason = reason;
if reason.len() > KERNEL_OP_REASON_MAX {
let cut = super::utf8_safe_truncate_len(&reason, KERNEL_OP_REASON_MAX);
reason.truncate(cut);
}
KernelOpReplyPayload {
request_id,
success: false,
reason,
read_values: Vec::new(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vmm::KERNEL_HALF_CANONICAL as KERNEL_HALF_CANONICAL_4LEVEL;
const _: () = assert!(
KERNEL_HALF_CONSERVATIVE_5LEVEL < KERNEL_HALF_CANONICAL_4LEVEL,
"5-level threshold must be permissively lower than 4-level canonical",
);
#[test]
fn error_reply_passes_short_reason_unchanged() {
let reply = error_reply(7, "short".to_string());
assert!(!reply.success);
assert_eq!(reply.reason, "short");
}
#[test]
fn read_direction_with_oru32_value_rejects() {
const MASK: u32 = 1 << 5;
const ENTRY_IDX: usize = 0;
let helper_reason = oru32_read_rejection_reason(MASK);
let batch_reason = format!("entry[{ENTRY_IDX}]: {helper_reason}");
let reply = error_reply(99, batch_reason.clone());
assert!(!reply.success);
assert_eq!(reply.request_id, 99);
assert_eq!(reply.reason, batch_reason);
assert!(helper_reason.contains("KernelValueWidth::u32()"));
assert!(helper_reason.contains("OrU32"));
assert!(helper_reason.contains(&format!("{MASK:#x}")));
}
#[test]
fn per_cpu_field_unknown_symbol_rejected() {
let err = struct_name_for_per_cpu_symbol("not_a_real_per_cpu_symbol")
.expect_err("unknown symbol must reject");
assert!(err.contains("PerCpuField"));
assert!(err.contains("not_a_real_per_cpu_symbol"));
assert!(err.contains("runqueues"));
assert!(err.contains("kernel_cpustat"));
assert!(err.contains("kstat"));
assert!(err.contains("tick_cpu_sched"));
}
#[test]
fn per_cpu_field_known_symbol_mapping() {
assert_eq!(struct_name_for_per_cpu_symbol("runqueues").unwrap(), "rq");
assert_eq!(
struct_name_for_per_cpu_symbol("kernel_cpustat").unwrap(),
"kernel_cpustat"
);
assert_eq!(
struct_name_for_per_cpu_symbol("kstat").unwrap(),
"kernel_stat"
);
assert_eq!(
struct_name_for_per_cpu_symbol("tick_cpu_sched").unwrap(),
"tick_sched"
);
}
#[test]
fn dispatch_arms_call_validate_target_helpers() {
let full_src = include_str!("kernel_op_dispatch.rs");
let test_mod_start = full_src
.find("#[cfg(test)]")
.expect("test module must exist");
let src = &full_src[..test_mod_start];
let direct_arms: Vec<_> = src
.match_indices("KernelOpTarget::Direct(kva), KernelOpValue::")
.collect();
assert_eq!(
direct_arms.len(),
7,
"expected exactly 7 Direct arms (4 write + 3 read); found {}",
direct_arms.len()
);
for (idx, _) in &direct_arms {
let window_end = (idx + 400).min(src.len());
let window = &src[*idx..window_end];
assert!(
window.contains("validate_direct_target("),
"Direct arm at byte offset {idx} is missing validate_direct_target() call; \
window: {window:?}"
);
}
let kva_arms: Vec<_> = src
.match_indices("KernelOpTarget::Kva(kva), KernelOpValue::")
.collect();
assert_eq!(
kva_arms.len(),
7,
"expected exactly 7 Kva arms (4 write + 3 read); found {}",
kva_arms.len()
);
for (idx, _) in &kva_arms {
let window_end = (idx + 400).min(src.len());
let window = &src[*idx..window_end];
assert!(
window.contains("validate_kva_target("),
"Kva arm at byte offset {idx} is missing validate_kva_target() call; \
window: {window:?}"
);
}
}
#[test]
fn error_reply_truncates_at_utf8_boundary_classes() {
for (cp, label, padding) in [
("é", "2byte_U+00E9", 4), ("☃", "3byte_U+2603", 6), ("🦀", "4byte_U+1F980", 8), ("\u{FEFF}", "BOM_U+FEFF", 6), ] {
let mut s = String::new();
while s.len() < KERNEL_OP_REASON_MAX + padding {
s.push_str(cp);
}
let reply = error_reply(42, s);
assert!(
reply.reason.len() <= KERNEL_OP_REASON_MAX,
"{label}: reason.len()={} > cap={KERNEL_OP_REASON_MAX}",
reply.reason.len()
);
assert!(
reply.reason.is_char_boundary(reply.reason.len()),
"{label}: truncation landed mid-codepoint"
);
let _ = reply.reason.as_str();
}
}
#[test]
fn error_reply_truncates_mixed_width_input_at_boundary() {
let pattern = "Aé☃🦀";
let mut s = String::new();
while s.len() < KERNEL_OP_REASON_MAX + 10 {
s.push_str(pattern);
}
let reply = error_reply(99, s);
assert!(reply.reason.len() <= KERNEL_OP_REASON_MAX);
assert!(reply.reason.is_char_boundary(reply.reason.len()));
let _ = reply.reason.as_str();
}
#[test]
fn error_reply_truncates_pure_ascii_no_walkback() {
let s = "A".repeat(KERNEL_OP_REASON_MAX + 16);
let reply = error_reply(1, s);
assert_eq!(reply.reason.len(), KERNEL_OP_REASON_MAX);
assert!(reply.reason.is_char_boundary(reply.reason.len()));
}
#[test]
fn error_reply_zero_length_reason_passes() {
let reply = error_reply(2, String::new());
assert!(!reply.success);
assert_eq!(reply.reason, "");
}
#[test]
fn validate_kva_target_accepts_exact_threshold() {
assert!(validate_kva_target(KERNEL_HALF_CONSERVATIVE_5LEVEL, 4).is_ok());
}
#[test]
fn validate_kva_target_rejects_one_below_threshold() {
let kva = KERNEL_HALF_CONSERVATIVE_5LEVEL - 1;
let err = validate_kva_target(kva, 4).expect_err("must reject");
assert!(
err.contains(&format!("{kva:#x}")),
"error must echo rejected KVA for operator triage; got {err}"
);
}
#[test]
fn validate_kva_target_rejects_zero() {
let err = validate_kva_target(0, 4).expect_err("kva=0 must reject");
assert!(err.contains("0x0"));
}
#[test]
fn validate_kva_target_rejects_user_half_max() {
let kva = 0x0000_7FFF_FFFF_FFFF;
assert!(
validate_kva_target(kva, 4).is_err(),
"canonical user-half max must reject"
);
}
#[test]
fn validate_kva_target_accepts_kernel_typical() {
assert!(validate_kva_target(0xFFFF_FFFF_8100_0000, 4).is_ok());
assert!(validate_kva_target(0xFFFF_C900_0000_0000, 4).is_ok());
assert!(validate_kva_target(0xFF11_0000_0000_0000, 4).is_ok());
}
#[test]
fn user_half_kva_rejection_reason_format_pin() {
let kva = 0x4000_0000_0000;
let helper_reason = user_half_kva_rejection_reason(kva);
let batch_reason = format!("entry[0]: {helper_reason}");
let reply = error_reply(11, batch_reason.clone());
assert!(!reply.success);
assert_eq!(reply.reason, batch_reason);
assert!(helper_reason.contains(&format!("{kva:#x}")));
assert!(helper_reason.contains(&format!("{KERNEL_HALF_CONSERVATIVE_5LEVEL:#x}")));
assert!(helper_reason.contains("kernel-half"));
assert!(helper_reason.contains("5-level conservative"));
assert!(helper_reason.contains("Symbol target"));
}
#[test]
fn validate_direct_target_accepts_in_range() {
let page_offset = 0xFFFF_8880_0000_0000u64;
let dram_size = 256 * 1024 * 1024; assert!(validate_direct_target(page_offset, 4, page_offset, dram_size).is_ok());
assert!(validate_direct_target(page_offset + 0x1000, 8, page_offset, dram_size).is_ok());
assert!(
validate_direct_target(page_offset + dram_size - 4, 4, page_offset, dram_size).is_ok()
);
}
#[test]
fn validate_direct_target_rejects_below_page_offset() {
let page_offset = 0xFFFF_8880_0000_0000u64;
let dram_size = 256 * 1024 * 1024;
let kva = page_offset - 1;
let err = validate_direct_target(kva, 4, page_offset, dram_size)
.expect_err("kva below page_offset must reject");
assert!(err.contains(&format!("{kva:#x}")));
assert!(err.contains(&format!("{page_offset:#x}")));
assert!(err.contains("would wrap"));
}
#[test]
fn validate_direct_target_rejects_past_end() {
let page_offset = 0xFFFF_8880_0000_0000u64;
let dram_size = 256 * 1024 * 1024;
let kva = page_offset + dram_size - 3;
let err = validate_direct_target(kva, 4, page_offset, dram_size)
.expect_err("kva+len past direct-map end must reject");
assert!(err.contains("overruns direct-map end"));
}
#[test]
fn validate_direct_target_rejects_kva_len_overflow() {
let page_offset = 0xFFFF_8880_0000_0000u64;
let dram_size = 256 * 1024 * 1024;
let kva = u64::MAX - 2;
let err = validate_direct_target(kva, 4, page_offset, dram_size)
.expect_err("kva+len overflow must reject");
assert!(err.contains("overflow"));
}
#[test]
fn validate_kva_target_rejects_kva_len_overflow() {
let kva = u64::MAX - 2;
let err = validate_kva_target(kva, 4).expect_err("kva+len overflow must reject");
assert!(err.contains("overflow"));
}
#[test]
fn or_u32_rmw_anchors_inside_dispatch_one_write() {
let full_src = include_str!("kernel_op_dispatch.rs");
let test_mod_start = full_src
.find("#[cfg(test)]")
.expect("test module must exist");
let src = &full_src[..test_mod_start];
let arm_sites: Vec<_> = src
.match_indices("KernelOpValue::OrU32(mask)) => {")
.collect();
assert_eq!(
arm_sites.len(),
3,
"expected exactly 3 OrU32 write arms (Symbol/Direct/Kva); \
found {} — if a 4th was added, add the rmw-invariant-anchor \
comment to it AND update this expected count",
arm_sites.len()
);
for (idx, _) in &arm_sites {
let window_end = (idx + 400).min(src.len());
let window = &src[*idx..window_end];
assert!(
window.contains("rmw-invariant-anchor"),
"OrU32 arm at byte offset {idx} is missing the \
// rmw-invariant-anchor comment; window: {window:?}"
);
}
let dow_start = src
.find("fn dispatch_one_write(")
.expect("dispatch_one_write must exist");
let dow_end = src[dow_start..]
.find("\nfn ")
.map(|rel| dow_start + rel)
.unwrap_or(src.len());
let global_or_mask: Vec<_> = src.match_indices("| mask").collect();
let inside_dow: Vec<_> = global_or_mask
.iter()
.filter(|(idx, _)| *idx >= dow_start && *idx < dow_end)
.collect();
for (idx, _) in &global_or_mask {
let lookahead_end = (idx + 6).min(src.len());
let lookahead = &src[*idx..lookahead_end];
if lookahead.contains("| mask)") {
assert!(
*idx >= dow_start && *idx < dow_end,
"Production `| mask)` OR-with-mask call at byte offset \
{idx} is OUTSIDE dispatch_one_write \
[start={dow_start}, end={dow_end}). \
A refactor extracted the OrU32 RMW into a helper, \
breaking the same-rendezvous-epoch invariant. \
Move it back inside dispatch_one_write OR (if \
intentional) update this test."
);
}
}
assert_eq!(
inside_dow.len(),
3,
"expected exactly 3 `| mask` production sites inside \
dispatch_one_write (one per Symbol/Direct/Kva OrU32 arm); \
found {}",
inside_dow.len()
);
}
mod synth_task {
pub(super) const PID_OFF: usize = 0x10;
pub(super) const START_TIME_OFF: usize = 0x18;
pub(super) const STATE_OFF: usize = 0x20;
pub(super) const ON_RQ_OFF: usize = 0x28;
pub(super) const SCHED_CLASS_OFF: usize = 0x30;
pub(super) const START_BOOTTIME_OFF: usize = 0x40;
pub(super) const SCX_DSQ_OFF: usize = 0x48;
pub(super) const SCX_RUNNABLE_NODE_OFF: usize = 0x50;
pub(super) const TASKS_OFF: usize = 0x60;
pub(super) const SIGNAL_OFF: usize = 0x70;
pub(super) const SIGNAL_THREAD_HEAD_OFF: usize = 0x10;
pub(super) const THREAD_NODE_OFF: usize = 0x78;
}
fn synth_validation_offsets() -> TaskValidationOffsets {
TaskValidationOffsets {
pid: synth_task::PID_OFF,
start_time: synth_task::START_TIME_OFF,
state: synth_task::STATE_OFF,
on_rq: synth_task::ON_RQ_OFF,
sched_class: synth_task::SCHED_CLASS_OFF,
start_boottime: synth_task::START_BOOTTIME_OFF,
scx_dsq: synth_task::SCX_DSQ_OFF,
scx_runnable_node: synth_task::SCX_RUNNABLE_NODE_OFF,
tasks: synth_task::TASKS_OFF,
signal: synth_task::SIGNAL_OFF,
signal_thread_head: synth_task::SIGNAL_THREAD_HEAD_OFF,
thread_node: synth_task::THREAD_NODE_OFF,
}
}
const EXT_KVA: u64 = 0xFFFF_FFFF_8200_0100;
const DEFAULT_START_TIME: u64 = 1_700_000_000_000;
fn paint_valid_task(buf: &mut [u8], pa: usize, pid: u32) {
const PAGE_OFFSET: u64 = 0xFFFF_8880_0000_0000;
buf[pa + synth_task::PID_OFF..pa + synth_task::PID_OFF + 4]
.copy_from_slice(&pid.to_le_bytes());
buf[pa + synth_task::START_TIME_OFF..pa + synth_task::START_TIME_OFF + 8]
.copy_from_slice(&DEFAULT_START_TIME.to_le_bytes());
buf[pa + synth_task::STATE_OFF..pa + synth_task::STATE_OFF + 4]
.copy_from_slice(&0u32.to_le_bytes());
buf[pa + synth_task::ON_RQ_OFF..pa + synth_task::ON_RQ_OFF + 4]
.copy_from_slice(&0u32.to_le_bytes());
buf[pa + synth_task::SCHED_CLASS_OFF..pa + synth_task::SCHED_CLASS_OFF + 8]
.copy_from_slice(&EXT_KVA.to_le_bytes());
buf[pa + synth_task::START_BOOTTIME_OFF..pa + synth_task::START_BOOTTIME_OFF + 8]
.copy_from_slice(&3_600_000_000_000u64.to_le_bytes());
buf[pa + synth_task::SCX_DSQ_OFF..pa + synth_task::SCX_DSQ_OFF + 8]
.copy_from_slice(&0u64.to_le_bytes());
let task_kva = (pa as u64).wrapping_add(PAGE_OFFSET);
let self_kva = task_kva.wrapping_add(synth_task::SCX_RUNNABLE_NODE_OFF as u64);
buf[pa + synth_task::SCX_RUNNABLE_NODE_OFF..pa + synth_task::SCX_RUNNABLE_NODE_OFF + 8]
.copy_from_slice(&self_kva.to_le_bytes());
}
fn build_test_kernel(
buf: &mut [u8],
symbols: std::collections::HashMap<String, u64>,
) -> crate::monitor::guest::GuestKernel {
const PAGE_OFFSET: u64 = 0xFFFF_8880_0000_0000;
let mem = unsafe {
std::sync::Arc::new(crate::monitor::reader::GuestMem::new(
buf.as_mut_ptr(),
buf.len() as u64,
))
};
crate::monitor::guest::GuestKernel::new_for_test(
mem,
symbols,
PAGE_OFFSET,
0, false, )
}
fn validate(
kernel: &crate::monitor::guest::GuestKernel,
task_pa: u64,
pid: u32,
expected_start_time_ns: u64,
offs: &TaskValidationOffsets,
) -> Result<(), String> {
validate_task_for_field_op(kernel, task_pa, pid, expected_start_time_ns, offs, EXT_KVA)
}
#[test]
fn validate_task_happy_path_accepts() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
assert!(validate(&kernel, 0, 12345, DEFAULT_START_TIME, &offs).is_ok());
}
#[test]
fn validate_task_rejects_pid_mismatch() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 99);
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let err = validate(&kernel, 0, 12345, DEFAULT_START_TIME, &offs)
.expect_err("pid mismatch must reject");
assert!(err.contains("pid mismatch"), "must name layer: {err}");
assert!(err.contains("read pid=99"));
assert!(err.contains("expected 12345"));
}
#[test]
fn validate_task_rejects_start_time_below_window() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let too_high_expected = DEFAULT_START_TIME + 1_000_000;
let err = validate(&kernel, 0, 12345, too_high_expected, &offs)
.expect_err("start_time below window must reject");
assert!(err.contains("start_time identity mismatch"));
assert!(err.contains(&format!("observed={DEFAULT_START_TIME}")));
assert!(err.contains(&format!("expected in [{too_high_expected}")));
assert!(err.contains("recycled"));
}
#[test]
fn validate_task_accepts_start_time_within_tick_window() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let expected_within_window = DEFAULT_START_TIME - 5_000_000;
validate(&kernel, 0, 12345, expected_within_window, &offs)
.expect("start_time within tick window must accept");
}
#[test]
fn validate_task_rejects_start_time_above_window() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let expected_below_window = DEFAULT_START_TIME - 20_000_000;
let err = validate(&kernel, 0, 12345, expected_below_window, &offs)
.expect_err("start_time above window must reject");
assert!(err.contains("start_time identity mismatch"));
assert!(err.contains("recycled"));
}
#[test]
fn validate_task_rejects_task_dead() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
buf[synth_task::STATE_OFF..synth_task::STATE_OFF + 4]
.copy_from_slice(&0x80u32.to_le_bytes());
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let err = validate(&kernel, 0, 12345, DEFAULT_START_TIME, &offs)
.expect_err("TASK_DEAD must reject");
assert!(err.contains("TASK_DEAD"));
assert!(err.contains("state=0x80"));
}
#[test]
fn validate_task_rejects_on_rq_queued() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
buf[synth_task::ON_RQ_OFF..synth_task::ON_RQ_OFF + 4].copy_from_slice(&1u32.to_le_bytes());
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let err = validate(&kernel, 0, 12345, DEFAULT_START_TIME, &offs)
.expect_err("on_rq=1 must reject");
assert!(err.contains("on_rq=1"));
assert!(err.contains("rb-tree"));
assert!(err.contains("WorkType::FutexPingPong"));
}
#[test]
fn validate_task_rejects_scx_dsq_populated() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
buf[synth_task::SCX_DSQ_OFF..synth_task::SCX_DSQ_OFF + 8]
.copy_from_slice(&0xFFFF_DEAD_BEEFu64.to_le_bytes());
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let err = validate(&kernel, 0, 12345, DEFAULT_START_TIME, &offs)
.expect_err("scx.dsq non-NULL must reject");
assert!(err.contains("scx.dsq=0xffffdeadbeef"));
assert!(err.contains("SCX DSQ"));
assert!(err.contains("WorkType::FutexPingPong"));
}
#[test]
fn validate_task_rejects_scx_runnable_node_linked() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
buf[synth_task::SCX_RUNNABLE_NODE_OFF..synth_task::SCX_RUNNABLE_NODE_OFF + 8]
.copy_from_slice(&0xFFFF_8881_DEAD_C0DEu64.to_le_bytes());
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let err = validate(&kernel, 0, 12345, DEFAULT_START_TIME, &offs)
.expect_err("runnable_node linked must reject");
assert!(err.contains("scx.runnable_node is linked"));
assert!(err.contains("WorkType::FutexPingPong"));
}
#[test]
fn validate_task_rejects_non_ext_sched_class() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
let fair_kva: u64 = 0xFFFF_FFFF_8200_0000;
buf[synth_task::SCHED_CLASS_OFF..synth_task::SCHED_CLASS_OFF + 8]
.copy_from_slice(&fair_kva.to_le_bytes());
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let err = validate(&kernel, 0, 12345, DEFAULT_START_TIME, &offs)
.expect_err("non-ext sched_class must reject");
assert!(err.contains(&format!("sched_class={fair_kva:#x}")));
assert!(err.contains("SCX-managed tasks only"));
assert!(err.contains("SchedPolicy::Ext"));
}
#[test]
fn validate_task_rejects_zero_start_boottime() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
buf[synth_task::START_BOOTTIME_OFF..synth_task::START_BOOTTIME_OFF + 8]
.copy_from_slice(&0u64.to_le_bytes());
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let err = validate(&kernel, 0, 12345, DEFAULT_START_TIME, &offs)
.expect_err("start_boottime=0 must reject");
assert!(err.contains("start_boottime=0"));
assert!(err.contains("slab-recycle"));
}
#[test]
fn validate_task_layer_order_pid_before_start_time() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 99);
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let err =
validate(&kernel, 0, 12345, DEFAULT_START_TIME + 1, &offs).expect_err("must reject");
assert!(err.contains("pid mismatch"), "L1 must fire first: {err}");
assert!(!err.contains("start_time identity mismatch"));
}
#[test]
fn validate_task_layer_order_start_time_before_dead() {
let mut buf = vec![0u8; 4096];
paint_valid_task(&mut buf, 0, 12345);
buf[synth_task::STATE_OFF..synth_task::STATE_OFF + 4]
.copy_from_slice(&0x80u32.to_le_bytes());
let kernel = build_test_kernel(&mut buf, Default::default());
let offs = synth_validation_offsets();
let err =
validate(&kernel, 0, 12345, DEFAULT_START_TIME + 1, &offs).expect_err("must reject");
assert!(
err.contains("start_time identity mismatch"),
"L2 must fire first: {err}"
);
assert!(!err.contains("TASK_DEAD"));
}
#[test]
fn oru32_sets_target_bits_preserves_others() {
const SYMBOL_PA: u64 = 0x40;
const SYMBOL_KVA: u64 = 0xFFFF_FFFF_8000_0040;
const INITIAL_FLAGS: u32 = 0xAAAA_AAAA;
const OR_MASK: u32 = 0x0000_0001;
let mut buf = vec![0u8; 4096];
buf[SYMBOL_PA as usize..SYMBOL_PA as usize + 4]
.copy_from_slice(&INITIAL_FLAGS.to_le_bytes());
let mut symbols = std::collections::HashMap::new();
symbols.insert("test_flags".to_string(), SYMBOL_KVA);
let kernel = build_test_kernel(&mut buf, symbols);
dispatch_one_write(
&kernel,
None,
0,
&KernelOpTarget::Symbol("test_flags".into()),
&KernelOpValue::OrU32(OR_MASK),
)
.expect("OrU32 RMW dispatch must succeed against painted symbol");
let observed = kernel
.read_symbol_u32("test_flags")
.expect("read-back must succeed");
assert_eq!(
observed,
INITIAL_FLAGS | OR_MASK,
"OrU32 must set 0x{OR_MASK:08x} without clobbering 0x{INITIAL_FLAGS:08x}"
);
}
#[test]
fn oru32_idempotent_on_already_set_bit() {
const SYMBOL_PA: u64 = 0x40;
const SYMBOL_KVA: u64 = 0xFFFF_FFFF_8000_0040;
const INITIAL_FLAGS: u32 = 0xAAAA_AAAA;
const ALREADY_SET: u32 = 0x0000_0002;
let mut buf = vec![0u8; 4096];
buf[SYMBOL_PA as usize..SYMBOL_PA as usize + 4]
.copy_from_slice(&INITIAL_FLAGS.to_le_bytes());
let mut symbols = std::collections::HashMap::new();
symbols.insert("test_flags".to_string(), SYMBOL_KVA);
let kernel = build_test_kernel(&mut buf, symbols);
assert_eq!(
INITIAL_FLAGS & ALREADY_SET,
ALREADY_SET,
"test setup bug: chose a bit that is not pre-set"
);
dispatch_one_write(
&kernel,
None,
0,
&KernelOpTarget::Symbol("test_flags".into()),
&KernelOpValue::OrU32(ALREADY_SET),
)
.expect("OrU32 with already-set bit must succeed");
let observed = kernel
.read_symbol_u32("test_flags")
.expect("read-back must succeed");
assert_eq!(
observed, INITIAL_FLAGS,
"OrU32 of already-set bit must leave value unchanged \
(regression: bit was toggled or cleared instead of OR'd)"
);
}
#[test]
fn oru32_postcard_round_trip_through_payload() {
const MASK: u32 = 0xDEAD_BEEF;
let payload = crate::vmm::wire::KernelOpRequestPayload {
request_id: 0xABCD,
mode: crate::vmm::wire::KernelOpMode::Cold,
direction: crate::vmm::wire::KernelOpDirection::Write,
tag: "oru32_roundtrip_pin".into(),
entries: vec![crate::vmm::wire::KernelOpEntry {
target: KernelOpTarget::Symbol("any_symbol".into()),
value: KernelOpValue::OrU32(MASK),
}],
};
let bytes = postcard::to_allocvec(&payload).expect("encode");
let back: crate::vmm::wire::KernelOpRequestPayload =
postcard::from_bytes(&bytes).expect("decode");
assert_eq!(back.entries.len(), 1);
match &back.entries[0].value {
KernelOpValue::OrU32(observed_mask) => {
assert_eq!(*observed_mask, MASK, "OrU32 mask must survive round-trip");
}
other => panic!("expected OrU32 variant after round-trip, got {other:?}"),
}
}
}