use anyhow::{Context, Result};
use std::path::Path;
#[cfg(target_arch = "x86_64")]
pub(crate) const START_KERNEL_MAP: u64 = 0xffff_ffff_8000_0000;
#[cfg(target_arch = "aarch64")]
pub(crate) const START_KERNEL_MAP: u64 = 0xffff_8000_8000_0000;
#[cfg(target_arch = "x86_64")]
pub(crate) const DEFAULT_PAGE_OFFSET: u64 = 0xffff_8880_0000_0000;
#[cfg(target_arch = "aarch64")]
pub(crate) const DEFAULT_PAGE_OFFSET: u64 = 0xffff_0000_0000_0000;
#[derive(Debug, Clone)]
pub(crate) struct KernelSymbols {
pub runqueues: u64,
pub per_cpu_offset: u64,
pub page_offset_base_kva: Option<u64>,
pub scx_root: Option<u64>,
pub scx_watchdog_timeout: Option<u64>,
pub init_top_pgt: Option<u64>,
pub pgtable_l5_enabled: Option<u64>,
pub prog_idr: Option<u64>,
}
impl KernelSymbols {
pub fn from_vmlinux(path: &Path) -> Result<Self> {
let data =
std::fs::read(path).with_context(|| format!("read vmlinux: {}", path.display()))?;
let elf = goblin::elf::Elf::parse(&data).context("parse vmlinux ELF")?;
let sym_addr = |name: &str| -> Option<u64> {
elf.syms
.iter()
.find(|s| s.st_value != 0 && elf.strtab.get_at(s.st_name) == Some(name))
.map(|s| s.st_value)
};
let runqueues = sym_addr("runqueues").context("symbol 'runqueues' not found in vmlinux")?;
let per_cpu_offset = sym_addr("__per_cpu_offset")
.context("symbol '__per_cpu_offset' not found in vmlinux")?;
let page_offset_base_kva = sym_addr("page_offset_base");
let scx_root = sym_addr("scx_root");
let scx_watchdog_timeout = sym_addr("scx_watchdog_timeout");
let init_top_pgt = sym_addr("init_top_pgt").or_else(|| sym_addr("swapper_pg_dir"));
let pgtable_l5_enabled = sym_addr("__pgtable_l5_enabled");
let prog_idr = sym_addr("prog_idr");
Ok(Self {
runqueues,
per_cpu_offset,
page_offset_base_kva,
scx_root,
scx_watchdog_timeout,
init_top_pgt,
pgtable_l5_enabled,
prog_idr,
})
}
}
pub(crate) fn resolve_page_offset(mem: &super::reader::GuestMem, symbols: &KernelSymbols) -> u64 {
let Some(pob_kva) = symbols.page_offset_base_kva else {
return DEFAULT_PAGE_OFFSET;
};
let pob_pa = text_kva_to_pa(pob_kva);
let val = mem.read_u64(pob_pa, 0);
if val & (1u64 << 63) != 0 {
val
} else {
DEFAULT_PAGE_OFFSET
}
}
pub(crate) fn resolve_pgtable_l5(mem: &super::reader::GuestMem, symbols: &KernelSymbols) -> bool {
let Some(kva) = symbols.pgtable_l5_enabled else {
return false;
};
let pa = text_kva_to_pa(kva);
mem.read_u32(pa, 0) != 0
}
pub(crate) fn kva_to_pa(kva: u64, page_offset: u64) -> u64 {
kva.wrapping_sub(page_offset)
}
pub(crate) fn text_kva_to_pa(kva: u64) -> u64 {
kva.wrapping_sub(START_KERNEL_MAP)
}
pub(crate) unsafe fn read_per_cpu_offsets(
host_base: *const u8,
per_cpu_offset_pa: u64,
num_cpus: u32,
) -> Vec<u64> {
let mut offsets = Vec::with_capacity(num_cpus as usize);
for cpu in 0..num_cpus {
let addr = per_cpu_offset_pa + (cpu as u64) * 8;
let ptr = unsafe { host_base.add(addr as usize) as *const u64 };
let val = unsafe { std::ptr::read_volatile(ptr) };
offsets.push(val);
}
offsets
}
pub(crate) fn compute_rq_pas(
runqueues_kva: u64,
per_cpu_offsets: &[u64],
page_offset: u64,
) -> Vec<u64> {
per_cpu_offsets
.iter()
.map(|&offset| kva_to_pa(runqueues_kva.wrapping_add(offset), page_offset))
.collect()
}
#[allow(dead_code)]
pub(crate) fn write_watchdog_timeout(
mem: &super::reader::GuestMem,
symbols: &KernelSymbols,
val: u64,
) -> bool {
let Some(kva) = symbols.scx_watchdog_timeout else {
return false;
};
let pa = text_kva_to_pa(kva);
mem.write_u64(pa, 0, val);
true
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn find_runqueues_symbol() {
let path = match crate::monitor::find_test_vmlinux() {
Some(p) => p,
None => return,
};
if path.starts_with("/sys/") {
return;
}
let syms = KernelSymbols::from_vmlinux(&path).unwrap();
assert_ne!(syms.runqueues, 0);
assert_ne!(syms.per_cpu_offset, 0);
assert!(syms.runqueues > 0xffff_0000_0000_0000);
}
#[test]
fn kva_to_pa_basic() {
let page_offset = DEFAULT_PAGE_OFFSET;
let dram_kva = page_offset.wrapping_add(0x10_0000);
assert_eq!(kva_to_pa(dram_kva, page_offset), 0x10_0000);
assert_eq!(kva_to_pa(page_offset, page_offset), 0);
}
#[test]
fn compute_rq_pas_two_cpus() {
let page_offset = DEFAULT_PAGE_OFFSET;
let runqueues = page_offset.wrapping_add(0x20_0000);
let offsets = vec![0, 0x4_0000]; let pas = compute_rq_pas(runqueues, &offsets, page_offset);
assert_eq!(pas[0], 0x20_0000);
assert_eq!(pas[1], 0x24_0000);
}
#[test]
fn from_vmlinux_nonexistent() {
let path = std::path::Path::new("/nonexistent/vmlinux");
assert!(KernelSymbols::from_vmlinux(path).is_err());
}
#[test]
fn read_per_cpu_offsets_zero_cpus() {
let buf = [0u8; 64];
let result = unsafe { read_per_cpu_offsets(buf.as_ptr(), 0, 0) };
assert!(result.is_empty());
}
#[test]
fn read_per_cpu_offsets_known_buffer() {
let offsets: [u64; 3] = [0x1000, 0x2000, 0x3000];
let buf: &[u8] = unsafe { std::slice::from_raw_parts(offsets.as_ptr() as *const u8, 24) };
let result = unsafe { read_per_cpu_offsets(buf.as_ptr(), 0, 3) };
assert_eq!(result.len(), 3);
assert_eq!(result[0], 0x1000);
assert_eq!(result[1], 0x2000);
assert_eq!(result[2], 0x3000);
}
#[test]
fn read_per_cpu_offsets_nonzero_pa() {
let mut buf = [0u8; 40]; let vals: [u64; 3] = [0xAA, 0xBB, 0xCC];
buf[16..40]
.copy_from_slice(unsafe { std::slice::from_raw_parts(vals.as_ptr() as *const u8, 24) });
let result = unsafe { read_per_cpu_offsets(buf.as_ptr(), 16, 3) };
assert_eq!(result, vec![0xAA, 0xBB, 0xCC]);
}
#[test]
fn text_kva_to_pa_basic() {
assert_eq!(text_kva_to_pa(START_KERNEL_MAP + 0x10_0000), 0x10_0000);
assert_eq!(text_kva_to_pa(START_KERNEL_MAP), 0);
}
#[test]
fn kva_to_pa_wrapping() {
let page_offset = DEFAULT_PAGE_OFFSET;
let kva = 0x0000_0000_0001_0000u64;
let pa = kva_to_pa(kva, page_offset);
assert_eq!(pa, kva.wrapping_sub(page_offset));
}
#[test]
fn compute_rq_pas_empty_offsets() {
let page_offset = DEFAULT_PAGE_OFFSET;
let runqueues = page_offset.wrapping_add(0x20_0000);
let pas = compute_rq_pas(runqueues, &[], page_offset);
assert!(pas.is_empty());
}
#[test]
fn compute_rq_pas_single_cpu() {
let page_offset = DEFAULT_PAGE_OFFSET;
let runqueues = page_offset.wrapping_add(0x20_0000);
let pas = compute_rq_pas(runqueues, &[0], page_offset);
assert_eq!(pas.len(), 1);
assert_eq!(pas[0], 0x20_0000);
}
#[test]
fn write_watchdog_timeout_writes_value() {
use crate::monitor::reader::GuestMem;
let watchdog_kva = START_KERNEL_MAP + 0x1000;
let symbols = KernelSymbols {
runqueues: 0,
per_cpu_offset: 0,
page_offset_base_kva: None,
scx_root: None,
scx_watchdog_timeout: Some(watchdog_kva),
init_top_pgt: None,
pgtable_l5_enabled: None,
prog_idr: None,
};
let mut buf = [0u8; 0x2000];
let mem = GuestMem::new(buf.as_mut_ptr(), buf.len() as u64);
assert!(write_watchdog_timeout(&mem, &symbols, 30_000));
assert_eq!(mem.read_u64(0x1000, 0), 30_000);
}
#[test]
fn write_watchdog_timeout_returns_false_when_absent() {
use crate::monitor::reader::GuestMem;
let symbols = KernelSymbols {
runqueues: 0,
per_cpu_offset: 0,
page_offset_base_kva: None,
scx_root: None,
scx_watchdog_timeout: None,
init_top_pgt: None,
pgtable_l5_enabled: None,
prog_idr: None,
};
let buf = [0u8; 64];
let mem = GuestMem::new(buf.as_ptr() as *mut u8, buf.len() as u64);
assert!(!write_watchdog_timeout(&mem, &symbols, 30_000));
}
#[test]
fn resolve_page_offset_with_symbol() {
use crate::monitor::reader::GuestMem;
let pob_kva = START_KERNEL_MAP + 0x1000;
let expected_page_offset = 0xffff_8880_0000_0000u64;
let mut buf = [0u8; 0x2000];
buf[0x1000..0x1008].copy_from_slice(&expected_page_offset.to_ne_bytes());
let mem = GuestMem::new(buf.as_mut_ptr(), buf.len() as u64);
let symbols = KernelSymbols {
runqueues: 0,
per_cpu_offset: 0,
page_offset_base_kva: Some(pob_kva),
scx_root: None,
scx_watchdog_timeout: None,
init_top_pgt: None,
pgtable_l5_enabled: None,
prog_idr: None,
};
assert_eq!(resolve_page_offset(&mem, &symbols), expected_page_offset);
}
#[test]
fn resolve_page_offset_without_symbol() {
use crate::monitor::reader::GuestMem;
let buf = [0u8; 64];
let mem = GuestMem::new(buf.as_ptr() as *mut u8, buf.len() as u64);
let symbols = KernelSymbols {
runqueues: 0,
per_cpu_offset: 0,
page_offset_base_kva: None,
scx_root: None,
scx_watchdog_timeout: None,
init_top_pgt: None,
pgtable_l5_enabled: None,
prog_idr: None,
};
assert_eq!(resolve_page_offset(&mem, &symbols), DEFAULT_PAGE_OFFSET);
}
#[test]
fn resolve_page_offset_zero_value_falls_back() {
use crate::monitor::reader::GuestMem;
let pob_kva = START_KERNEL_MAP + 0x100;
let buf = [0u8; 0x200];
let mem = GuestMem::new(buf.as_ptr() as *mut u8, buf.len() as u64);
let symbols = KernelSymbols {
runqueues: 0,
per_cpu_offset: 0,
page_offset_base_kva: Some(pob_kva),
scx_root: None,
scx_watchdog_timeout: None,
init_top_pgt: None,
pgtable_l5_enabled: None,
prog_idr: None,
};
assert_eq!(resolve_page_offset(&mem, &symbols), DEFAULT_PAGE_OFFSET);
}
#[test]
fn resolve_page_offset_garbage_value_falls_back() {
use crate::monitor::reader::GuestMem;
let pob_kva = START_KERNEL_MAP + 0x1000;
let mut buf = [0u8; 0x2000];
let garbage: u64 = 0x1234_5678_DEAD_BEEF;
buf[0x1000..0x1008].copy_from_slice(&garbage.to_ne_bytes());
let mem = GuestMem::new(buf.as_mut_ptr(), buf.len() as u64);
let symbols = KernelSymbols {
runqueues: 0,
per_cpu_offset: 0,
page_offset_base_kva: Some(pob_kva),
scx_root: None,
scx_watchdog_timeout: None,
init_top_pgt: None,
pgtable_l5_enabled: None,
prog_idr: None,
};
assert_eq!(resolve_page_offset(&mem, &symbols), DEFAULT_PAGE_OFFSET);
}
#[test]
fn resolve_page_offset_randomized_memory() {
use crate::monitor::reader::GuestMem;
let pob_kva = START_KERNEL_MAP + 0x1000;
let randomized_page_offset = 0xff11_0000_0000_0000u64;
let mut buf = [0u8; 0x2000];
buf[0x1000..0x1008].copy_from_slice(&randomized_page_offset.to_ne_bytes());
let mem = GuestMem::new(buf.as_mut_ptr(), buf.len() as u64);
let symbols = KernelSymbols {
runqueues: 0,
per_cpu_offset: 0,
page_offset_base_kva: Some(pob_kva),
scx_root: None,
scx_watchdog_timeout: None,
init_top_pgt: None,
pgtable_l5_enabled: None,
prog_idr: None,
};
assert_eq!(resolve_page_offset(&mem, &symbols), randomized_page_offset);
}
#[test]
fn resolve_pgtable_l5_enabled() {
use crate::monitor::reader::GuestMem;
let l5_kva = START_KERNEL_MAP + 0x1000;
let mut buf = [0u8; 0x2000];
buf[0x1000..0x1004].copy_from_slice(&1u32.to_ne_bytes());
let mem = GuestMem::new(buf.as_mut_ptr(), buf.len() as u64);
let symbols = KernelSymbols {
runqueues: 0,
per_cpu_offset: 0,
page_offset_base_kva: None,
scx_root: None,
scx_watchdog_timeout: None,
init_top_pgt: None,
pgtable_l5_enabled: Some(l5_kva),
prog_idr: None,
};
assert!(resolve_pgtable_l5(&mem, &symbols));
}
#[test]
fn resolve_pgtable_l5_disabled() {
use crate::monitor::reader::GuestMem;
let l5_kva = START_KERNEL_MAP + 0x1000;
let mut buf = [0u8; 0x2000];
buf[0x1000..0x1004].copy_from_slice(&0u32.to_ne_bytes());
let mem = GuestMem::new(buf.as_mut_ptr(), buf.len() as u64);
let symbols = KernelSymbols {
runqueues: 0,
per_cpu_offset: 0,
page_offset_base_kva: None,
scx_root: None,
scx_watchdog_timeout: None,
init_top_pgt: None,
pgtable_l5_enabled: Some(l5_kva),
prog_idr: None,
};
assert!(!resolve_pgtable_l5(&mem, &symbols));
}
#[test]
fn resolve_pgtable_l5_absent_symbol() {
use crate::monitor::reader::GuestMem;
let buf = [0u8; 64];
let mem = GuestMem::new(buf.as_ptr() as *mut u8, buf.len() as u64);
let symbols = KernelSymbols {
runqueues: 0,
per_cpu_offset: 0,
page_offset_base_kva: None,
scx_root: None,
scx_watchdog_timeout: None,
init_top_pgt: None,
pgtable_l5_enabled: None,
prog_idr: None,
};
assert!(!resolve_pgtable_l5(&mem, &symbols));
}
}