use std::path::Path;
use std::sync::{Arc, OnceLock, RwLock};
use anyhow::{Context, Result, bail};
use btf_rs::{Btf, Type};
mod local_storage;
pub use local_storage::TaskStorageOffsets;
use local_storage::resolve_task_storage_offsets;
mod ringbuf_stackmap;
pub use ringbuf_stackmap::{BpfRingbufOffsets, BpfStackmapOffsets};
use ringbuf_stackmap::{resolve_ringbuf_offsets, resolve_stackmap_offsets};
mod struct_ops;
pub use struct_ops::StructOpsOffsets;
use struct_ops::resolve_struct_ops_offsets;
mod htab;
pub use htab::HtabOffsets;
use htab::resolve_htab_offsets;
mod cpu_time;
pub use cpu_time::{
CPUTIME_IDLE, CPUTIME_IOWAIT, CPUTIME_IRQ, CPUTIME_NICE, CPUTIME_SOFTIRQ, CPUTIME_STEAL,
CPUTIME_SYSTEM, CPUTIME_USER, CpuTimeOffsets, NR_SOFTIRQS,
};
#[allow(unused_imports)]
pub use cpu_time::SOFTIRQ_NAMES;
mod numa;
#[allow(unused_imports)]
pub use numa::{
NR_VM_NUMA_EVENT_ITEMS, NUMA_EVENT_NAMES, NUMA_FOREIGN, NUMA_HIT, NUMA_INTERLEAVE_HIT,
NUMA_LOCAL, NUMA_MISS, NUMA_OTHER, NumaStatsOffsets,
};
mod sched_domain;
use sched_domain::resolve_sched_domain_offsets;
pub use sched_domain::{CPU_MAX_IDLE_TYPES, SchedDomainOffsets, SchedDomainStatsOffsets};
pub(crate) fn load_btf_from_path(path: &Path) -> Result<Btf> {
let data = std::fs::read(path).context("read file")?;
load_btf_from_bytes(&data, path)
}
static VMLINUX_BTF_CACHE: OnceLock<RwLock<Option<Arc<Btf>>>> = OnceLock::new();
pub(crate) fn cached_vmlinux_btf() -> Option<Arc<Btf>> {
let slot = VMLINUX_BTF_CACHE.get_or_init(|| RwLock::new(None));
{
let read = slot.read().unwrap_or_else(|e| e.into_inner());
if let Some(btf) = read.as_ref() {
return Some(Arc::clone(btf));
}
}
let path = Path::new("/sys/kernel/btf/vmlinux");
let btf = match load_btf_from_path(path) {
Ok(b) => Arc::new(b),
Err(e) => {
tracing::warn!(
%e,
"btf_offsets: failed to load /sys/kernel/btf/vmlinux for \
process-global cache; falling back to None for this caller",
);
return None;
}
};
let mut write = slot.write().unwrap_or_else(|e| e.into_inner());
if write.is_none() {
*write = Some(Arc::clone(&btf));
}
Some(Arc::clone(write.as_ref().unwrap()))
}
pub(crate) fn load_btf_from_bytes(data: &[u8], path: &Path) -> Result<Btf> {
load_btf_from_bytes_inner(data, None, path)
}
pub(crate) fn load_btf_from_elf(
elf: &goblin::elf::Elf<'_>,
data: &[u8],
path: &Path,
) -> Result<Btf> {
load_btf_from_bytes_inner(data, Some(elf), path)
}
fn load_btf_from_bytes_inner(
data: &[u8],
elf: Option<&goblin::elf::Elf<'_>>,
path: &Path,
) -> Result<Btf> {
if is_raw_btf(data) {
return Btf::from_bytes(data).map_err(|e| anyhow::anyhow!("{e}"));
}
let (canon_path, sidecar_allowed) = match std::fs::canonicalize(path) {
Ok(c) => {
let inside = crate::cache::path_inside_cache_root(&c);
(c, inside)
}
Err(e) => {
tracing::debug!(
path = %path.display(),
err = %e,
"btf input path canonicalize failed; sidecar suppressed for this load",
);
(path.to_path_buf(), false)
}
};
let sidecar = btf_sidecar_path(&canon_path);
if sidecar_allowed {
if sidecar_fresh(&sidecar, &canon_path) {
match std::fs::read(&sidecar) {
Ok(cached) if is_raw_btf(&cached) => {
match Btf::from_bytes(&cached) {
Ok(btf) => return Ok(btf),
Err(e) => {
tracing::warn!(
path = %sidecar.display(),
err = %e,
"btf sidecar parse failed; falling back to ELF extraction",
);
}
}
}
Ok(_) => {
tracing::warn!(
path = %sidecar.display(),
"btf sidecar lacks 0x9FEB magic; falling back to ELF extraction",
);
}
Err(e) => {
tracing::warn!(
path = %sidecar.display(),
err = %e,
"btf sidecar read failed; falling back to ELF extraction",
);
}
}
}
} else {
tracing::debug!(
path = %canon_path.display(),
"btf sidecar suppressed: vmlinux path is outside the cache root",
);
}
let parsed_elf;
let elf_ref = match elf {
Some(e) => e,
None => {
parsed_elf = goblin::elf::Elf::parse(data).map_err(|_| {
anyhow::anyhow!(
"{}: not recognized as raw BTF (missing 0x9FEB magic) or ELF vmlinux",
path.display()
)
})?;
&parsed_elf
}
};
let btf_shdr = elf_ref
.section_headers
.iter()
.find(|shdr| elf_ref.shdr_strtab.get_at(shdr.sh_name) == Some(".BTF"));
let shdr = match btf_shdr {
Some(s) => s,
None => bail!("vmlinux ELF has no .BTF section"),
};
let offset = shdr.sh_offset as usize;
let size = shdr.sh_size as usize;
let btf_data = offset
.checked_add(size)
.and_then(|end| data.get(offset..end))
.context(".BTF section data out of bounds")?;
let btf = Btf::from_bytes(btf_data).map_err(|e| anyhow::anyhow!("{e}"))?;
if sidecar_allowed && let Err(e) = write_btf_sidecar(&sidecar, btf_data) {
tracing::warn!(
path = %sidecar.display(),
err = %e,
"btf sidecar write failed; BTF will be re-extracted from ELF on next load",
);
}
Ok(btf)
}
fn btf_sidecar_path(path: &Path) -> std::path::PathBuf {
let mut name = path.as_os_str().to_os_string();
name.push(".btf");
std::path::PathBuf::from(name)
}
fn is_raw_btf(data: &[u8]) -> bool {
data.len() >= 2 && data[0] == 0x9F && data[1] == 0xEB
}
fn sidecar_fresh(sidecar: &Path, vmlinux: &Path) -> bool {
let Ok(sidecar_mtime) = std::fs::metadata(sidecar).and_then(|m| m.modified()) else {
return false;
};
let Ok(vmlinux_mtime) = std::fs::metadata(vmlinux).and_then(|m| m.modified()) else {
return false;
};
sidecar_mtime >= vmlinux_mtime
}
fn write_btf_sidecar(sidecar: &Path, bytes: &[u8]) -> Result<()> {
use std::io::Write;
let parent = sidecar
.parent()
.context("btf sidecar path has no parent directory")?;
let mut tmp =
tempfile::NamedTempFile::new_in(parent).context("create tempfile for btf sidecar")?;
tmp.write_all(bytes).context("write btf sidecar contents")?;
tmp.as_file()
.sync_all()
.context("fsync btf sidecar before rename")?;
tmp.persist(sidecar)
.map_err(|e| anyhow::anyhow!("persist btf sidecar: {}", e.error))?;
Ok(())
}
#[derive(Debug, Clone)]
pub struct KernelOffsets {
pub rq_nr_running: usize,
pub rq_clock: usize,
pub rq_scx: usize,
pub scx_rq_nr_running: usize,
pub scx_rq_local_dsq: usize,
pub scx_rq_flags: usize,
pub dsq_nr: usize,
pub event_offsets: Option<ScxEventOffsets>,
pub schedstat_offsets: Option<SchedstatOffsets>,
pub sched_domain_offsets: Option<SchedDomainOffsets>,
pub watchdog_offsets: Option<ScxWatchdogOffsets>,
}
#[derive(Debug, Clone)]
pub struct ScxWatchdogOffsets {
pub scx_sched_watchdog_timeout_off: usize,
}
#[derive(Debug, Clone)]
pub struct ScxEventOffsets {
pub percpu_ptr_off: usize,
pub event_stats_off: usize,
pub ev_select_cpu_fallback: usize,
pub ev_dispatch_local_dsq_offline: usize,
pub ev_dispatch_keep_last: usize,
pub ev_enq_skip_exiting: usize,
pub ev_enq_skip_migration_disabled: usize,
pub ev_reenq_immed: Option<usize>,
pub ev_reenq_local_repeat: Option<usize>,
pub ev_refill_slice_dfl: Option<usize>,
pub ev_bypass_duration: Option<usize>,
pub ev_bypass_dispatch: Option<usize>,
pub ev_bypass_activate: Option<usize>,
pub ev_insert_not_owned: Option<usize>,
pub ev_sub_bypass_dispatch: Option<usize>,
}
impl KernelOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (rq_struct, _) = find_struct(btf, "rq")?;
let rq_nr_running = member_byte_offset(btf, &rq_struct, "nr_running")?;
let rq_clock = member_byte_offset(btf, &rq_struct, "clock")?;
let (rq_scx, scx_member) = member_byte_offset_with_member(btf, &rq_struct, "scx")?;
let scx_rq_struct =
resolve_member_struct(btf, &scx_member).context("btf: resolve type of rq.scx")?;
let scx_rq_nr_running = member_byte_offset(btf, &scx_rq_struct, "nr_running")?;
let (scx_rq_local_dsq, local_dsq_member) =
member_byte_offset_with_member(btf, &scx_rq_struct, "local_dsq")?;
let scx_rq_flags = member_byte_offset(btf, &scx_rq_struct, "flags")?;
let dsq_struct = resolve_member_struct(btf, &local_dsq_member)
.context("btf: resolve type of scx_rq.local_dsq")?;
let dsq_nr = member_byte_offset(btf, &dsq_struct, "nr")?;
let event_offsets = resolve_event_offsets(btf).ok();
let schedstat_offsets = resolve_schedstat_offsets(btf).ok();
let sched_domain_offsets = resolve_sched_domain_offsets(btf, &rq_struct).ok();
let watchdog_offsets = resolve_watchdog_offsets(btf).ok();
Ok(Self {
rq_nr_running,
rq_clock,
rq_scx,
scx_rq_nr_running,
scx_rq_local_dsq,
scx_rq_flags,
dsq_nr,
event_offsets,
schedstat_offsets,
sched_domain_offsets,
watchdog_offsets,
})
}
#[allow(dead_code)]
pub fn from_vmlinux(path: &Path) -> Result<Self> {
let btf =
load_btf_from_path(path).with_context(|| format!("btf: open {}", path.display()))?;
Self::from_btf(&btf)
}
}
fn resolve_event_offsets(btf: &Btf) -> Result<ScxEventOffsets> {
let (scx_sched_struct, _) = find_struct(btf, "scx_sched")?;
let pcpu_path = member_byte_offset(btf, &scx_sched_struct, "pcpu")
.ok()
.and_then(|pcpu_off| {
let (pcpu_struct, _) = find_struct(btf, "scx_sched_pcpu").ok()?;
let (stats_off, stats_member) =
member_byte_offset_with_member(btf, &pcpu_struct, "event_stats").ok()?;
let stats_struct = resolve_member_struct(btf, &stats_member).ok()?;
Some((pcpu_off, stats_off, stats_struct))
});
let (percpu_ptr_off, event_stats_off, event_stats_struct) = match pcpu_path {
Some(resolved) => resolved,
None => {
let (esc_off, esc_member) =
member_byte_offset_with_member(btf, &scx_sched_struct, "event_stats_cpu")
.context("btf: neither scx_sched.pcpu nor scx_sched.event_stats_cpu found")?;
let stats_struct = resolve_member_struct(btf, &esc_member)
.context("btf: resolve type of scx_sched.event_stats_cpu")?;
(esc_off, 0, stats_struct)
}
};
let ev_select_cpu_fallback =
member_byte_offset(btf, &event_stats_struct, "SCX_EV_SELECT_CPU_FALLBACK")?;
let ev_dispatch_local_dsq_offline = member_byte_offset(
btf,
&event_stats_struct,
"SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE",
)?;
let ev_dispatch_keep_last =
member_byte_offset(btf, &event_stats_struct, "SCX_EV_DISPATCH_KEEP_LAST")?;
let ev_enq_skip_exiting =
member_byte_offset(btf, &event_stats_struct, "SCX_EV_ENQ_SKIP_EXITING")?;
let ev_enq_skip_migration_disabled = member_byte_offset(
btf,
&event_stats_struct,
"SCX_EV_ENQ_SKIP_MIGRATION_DISABLED",
)?;
let ev_reenq_immed = member_byte_offset(btf, &event_stats_struct, "SCX_EV_REENQ_IMMED").ok();
let ev_reenq_local_repeat =
member_byte_offset(btf, &event_stats_struct, "SCX_EV_REENQ_LOCAL_REPEAT").ok();
let ev_refill_slice_dfl =
member_byte_offset(btf, &event_stats_struct, "SCX_EV_REFILL_SLICE_DFL")
.or_else(|_| member_byte_offset(btf, &event_stats_struct, "SCX_EV_ENQ_SLICE_DFL"))
.ok();
let ev_bypass_duration =
member_byte_offset(btf, &event_stats_struct, "SCX_EV_BYPASS_DURATION").ok();
let ev_bypass_dispatch =
member_byte_offset(btf, &event_stats_struct, "SCX_EV_BYPASS_DISPATCH").ok();
let ev_bypass_activate =
member_byte_offset(btf, &event_stats_struct, "SCX_EV_BYPASS_ACTIVATE").ok();
let ev_insert_not_owned =
member_byte_offset(btf, &event_stats_struct, "SCX_EV_INSERT_NOT_OWNED").ok();
let ev_sub_bypass_dispatch =
member_byte_offset(btf, &event_stats_struct, "SCX_EV_SUB_BYPASS_DISPATCH").ok();
Ok(ScxEventOffsets {
percpu_ptr_off,
event_stats_off,
ev_select_cpu_fallback,
ev_dispatch_local_dsq_offline,
ev_dispatch_keep_last,
ev_enq_skip_exiting,
ev_enq_skip_migration_disabled,
ev_reenq_immed,
ev_reenq_local_repeat,
ev_refill_slice_dfl,
ev_bypass_duration,
ev_bypass_dispatch,
ev_bypass_activate,
ev_insert_not_owned,
ev_sub_bypass_dispatch,
})
}
fn resolve_watchdog_offsets(btf: &Btf) -> Result<ScxWatchdogOffsets> {
let (scx_sched_struct, _) = find_struct(btf, "scx_sched")?;
let scx_sched_watchdog_timeout_off =
member_byte_offset(btf, &scx_sched_struct, "watchdog_timeout")?;
Ok(ScxWatchdogOffsets {
scx_sched_watchdog_timeout_off,
})
}
pub(crate) fn find_struct(btf: &Btf, name: &str) -> Result<(btf_rs::Struct, String)> {
let types = btf
.resolve_types_by_name(name)
.with_context(|| format!("btf: type '{name}' not found"))?;
for t in &types {
if let Type::Struct(s) = t {
let resolved_name = btf.resolve_name(s).unwrap_or_default();
return Ok((s.clone(), resolved_name));
}
}
bail!("btf: '{name}' exists but is not a struct");
}
pub(crate) enum StructOrFwd {
Full(btf_rs::Struct),
Fwd,
}
pub(crate) fn find_struct_or_fwd(btf: &Btf, name: &str) -> Result<StructOrFwd> {
let types = btf
.resolve_types_by_name(name)
.with_context(|| format!("btf: type '{name}' not found"))?;
let mut saw_fwd_struct = false;
for t in &types {
match t {
Type::Struct(s) => return Ok(StructOrFwd::Full(s.clone())),
Type::Fwd(f) if f.is_struct() => saw_fwd_struct = true,
_ => {}
}
}
if saw_fwd_struct {
return Ok(StructOrFwd::Fwd);
}
bail!("btf: '{name}' exists but is neither a struct nor a struct-flavour fwd")
}
pub(crate) fn resolve_var_offset_in_section(
btf: &Btf,
section_name: &str,
var_name: &str,
) -> Option<u32> {
let candidates = btf.resolve_types_by_name(section_name).ok()?;
for ty in candidates {
let Type::Datasec(ds) = ty else { continue };
for var_info in &ds.variables {
let Ok(chained) = btf.resolve_chained_type(var_info) else {
continue;
};
let Type::Var(var) = chained else { continue };
let Ok(name) = btf.resolve_name(&var) else {
continue;
};
if name == var_name {
return Some(var_info.offset());
}
}
}
None
}
pub(crate) fn member_byte_offset(btf: &Btf, s: &btf_rs::Struct, field: &str) -> Result<usize> {
member_byte_offset_recursive(btf, s, field, 0)
}
fn member_byte_offset_recursive(
btf: &Btf,
s: &btf_rs::Struct,
field: &str,
base_offset: usize,
) -> Result<usize> {
for member in &s.members {
let name = btf.resolve_name(member).unwrap_or_default();
let bits = member.bit_offset();
if bits % 8 != 0 {
if name == field {
bail!("btf: field '{field}' has non-byte-aligned offset ({bits} bits)");
}
continue;
}
let member_offset = base_offset + (bits / 8) as usize;
if name == field {
return Ok(member_offset);
}
if name.is_empty()
&& let Ok(inner) = resolve_member_composite(btf, member)
&& let Ok(offset) = member_byte_offset_recursive(btf, &inner, field, member_offset)
{
return Ok(offset);
}
}
bail!("btf: field '{field}' not found in struct");
}
fn resolve_member_composite(btf: &Btf, member: &btf_rs::Member) -> Result<btf_rs::Struct> {
let mut t = btf.resolve_chained_type(member)?;
for _ in 0..20 {
match t {
Type::Struct(s) | Type::Union(s) => return Ok(s),
Type::Const(_)
| Type::Volatile(_)
| Type::Typedef(_)
| Type::Restrict(_)
| Type::TypeTag(_) => {
t = btf.resolve_chained_type(t.as_btf_type().unwrap())?;
}
_ => bail!("btf: not a composite type"),
}
}
bail!("btf: type chain too deep")
}
pub(super) fn member_byte_offset_with_member(
btf: &Btf,
s: &btf_rs::Struct,
field: &str,
) -> Result<(usize, btf_rs::Member)> {
for member in &s.members {
let name = btf.resolve_name(member).unwrap_or_default();
if name == field {
let bits = member.bit_offset();
if bits % 8 != 0 {
bail!("btf: field '{field}' has non-byte-aligned offset ({bits} bits)");
}
return Ok(((bits / 8) as usize, member.clone()));
}
}
bail!("btf: field '{field}' not found in struct");
}
pub(super) fn resolve_member_struct(btf: &Btf, member: &btf_rs::Member) -> Result<btf_rs::Struct> {
use btf_rs::BtfType;
let tid = member.get_type_id().context("btf: member type_id")?;
super::bpf_map::resolve_to_struct(btf, tid).context("btf: could not resolve member to struct")
}
#[derive(Debug, Clone)]
pub struct SchedstatOffsets {
pub rq_sched_info: usize,
pub sched_info_run_delay: usize,
pub sched_info_pcount: usize,
pub rq_yld_count: usize,
pub rq_sched_count: usize,
pub rq_sched_goidle: usize,
pub rq_ttwu_count: usize,
pub rq_ttwu_local: usize,
}
fn resolve_schedstat_offsets(btf: &Btf) -> Result<SchedstatOffsets> {
let (rq_struct, _) = find_struct(btf, "rq")?;
let (rq_sched_info, sched_info_member) =
member_byte_offset_with_member(btf, &rq_struct, "rq_sched_info")?;
let sched_info_struct = resolve_member_struct(btf, &sched_info_member)
.context("btf: resolve type of rq.rq_sched_info")?;
let sched_info_run_delay = member_byte_offset(btf, &sched_info_struct, "run_delay")?;
let sched_info_pcount = member_byte_offset(btf, &sched_info_struct, "pcount")?;
let rq_yld_count = member_byte_offset(btf, &rq_struct, "yld_count")?;
let rq_sched_count = member_byte_offset(btf, &rq_struct, "sched_count")?;
let rq_sched_goidle = member_byte_offset(btf, &rq_struct, "sched_goidle")?;
let rq_ttwu_count = member_byte_offset(btf, &rq_struct, "ttwu_count")?;
let rq_ttwu_local = member_byte_offset(btf, &rq_struct, "ttwu_local")?;
Ok(SchedstatOffsets {
rq_sched_info,
sched_info_run_delay,
sched_info_pcount,
rq_yld_count,
rq_sched_count,
rq_sched_goidle,
rq_ttwu_count,
rq_ttwu_local,
})
}
#[derive(Debug, Clone, Copy)]
pub struct IdrOffsets {
pub xa_node_slots: usize,
pub xa_node_shift: usize,
pub idr_xa_head: usize,
pub idr_next: usize,
}
impl IdrOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (xa_node, _) = find_struct(btf, "xa_node")?;
let xa_node_slots = member_byte_offset(btf, &xa_node, "slots")?;
let xa_node_shift = member_byte_offset(btf, &xa_node, "shift")?;
let (idr_struct, _) = find_struct(btf, "idr")?;
let (idr_rt_off, idr_rt_member) =
member_byte_offset_with_member(btf, &idr_struct, "idr_rt")?;
let xa_struct = resolve_member_struct(btf, &idr_rt_member)
.context("btf: resolve type of idr.idr_rt")?;
let xa_head_off = member_byte_offset(btf, &xa_struct, "xa_head")?;
let idr_xa_head = idr_rt_off + xa_head_off;
let idr_next = member_byte_offset(btf, &idr_struct, "idr_next")?;
Ok(Self {
xa_node_slots,
xa_node_shift,
idr_xa_head,
idr_next,
})
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct BpfMapOffsets {
pub map_name: usize,
pub map_type: usize,
pub map_flags: usize,
pub key_size: usize,
pub value_size: usize,
pub max_entries: usize,
pub array_value: usize,
pub xa_node_slots: usize,
pub xa_node_shift: usize,
pub idr_xa_head: usize,
pub idr_next: usize,
pub map_btf: usize,
pub map_btf_value_type_id: usize,
pub map_btf_vmlinux_value_type_id: usize,
pub map_btf_key_type_id: usize,
pub btf_data: usize,
pub btf_data_size: usize,
pub btf_base_btf: usize,
pub htab_offsets: Option<HtabOffsets>,
pub task_storage_offsets: Option<TaskStorageOffsets>,
pub struct_ops_offsets: Option<StructOpsOffsets>,
pub ringbuf_offsets: Option<BpfRingbufOffsets>,
pub stackmap_offsets: Option<BpfStackmapOffsets>,
}
impl BpfMapOffsets {
#[cfg(test)]
pub(crate) const EMPTY: Self = Self {
map_name: 0,
map_type: 0,
map_flags: 0,
key_size: 0,
value_size: 0,
max_entries: 0,
array_value: 0,
xa_node_slots: 0,
xa_node_shift: 0,
idr_xa_head: 0,
idr_next: 0,
map_btf: 0,
map_btf_value_type_id: 0,
map_btf_vmlinux_value_type_id: 0,
map_btf_key_type_id: 0,
btf_data: 0,
btf_data_size: 0,
btf_base_btf: 0,
htab_offsets: None,
task_storage_offsets: None,
struct_ops_offsets: None,
ringbuf_offsets: None,
stackmap_offsets: None,
};
#[allow(dead_code)]
pub fn from_vmlinux(path: &Path) -> Result<Self> {
let btf =
load_btf_from_path(path).with_context(|| format!("btf: open {}", path.display()))?;
Self::from_btf(&btf)
}
#[allow(dead_code)]
pub fn from_vmlinux_bytes(data: &[u8], path: &Path) -> Result<Self> {
let btf = load_btf_from_bytes(data, path)?;
Self::from_btf(&btf)
}
pub fn from_elf(elf: &goblin::elf::Elf<'_>, data: &[u8], path: &Path) -> Result<Self> {
let btf = load_btf_from_elf(elf, data, path)?;
Self::from_btf(&btf)
}
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (bpf_map, _) = find_struct(btf, "bpf_map")?;
let map_name = member_byte_offset(btf, &bpf_map, "name")?;
let map_type = member_byte_offset(btf, &bpf_map, "map_type")?;
let map_flags = member_byte_offset(btf, &bpf_map, "map_flags")?;
let key_size = member_byte_offset(btf, &bpf_map, "key_size")?;
let value_size = member_byte_offset(btf, &bpf_map, "value_size")?;
let max_entries = member_byte_offset(btf, &bpf_map, "max_entries")?;
let (bpf_array, _) = find_struct(btf, "bpf_array")?;
let array_value = member_byte_offset(btf, &bpf_array, "value")?;
let idr = IdrOffsets::from_btf(btf)?;
let map_btf = member_byte_offset(btf, &bpf_map, "btf")?;
let map_btf_value_type_id = member_byte_offset(btf, &bpf_map, "btf_value_type_id")?;
let map_btf_vmlinux_value_type_id =
member_byte_offset(btf, &bpf_map, "btf_vmlinux_value_type_id").unwrap_or(0);
let map_btf_key_type_id = member_byte_offset(btf, &bpf_map, "btf_key_type_id")?;
let (btf_struct, _) = find_struct(btf, "btf")?;
let btf_data = member_byte_offset(btf, &btf_struct, "data")?;
let btf_data_size = member_byte_offset(btf, &btf_struct, "data_size")?;
let btf_base_btf = member_byte_offset(btf, &btf_struct, "base_btf")?;
let htab_offsets = resolve_htab_offsets(btf).ok();
let task_storage_offsets = resolve_task_storage_offsets(btf).ok();
let struct_ops_offsets = resolve_struct_ops_offsets(btf).ok();
let ringbuf_offsets = resolve_ringbuf_offsets(btf).ok();
let stackmap_offsets = resolve_stackmap_offsets(btf).ok();
Ok(Self {
map_name,
map_type,
map_flags,
key_size,
value_size,
max_entries,
array_value,
xa_node_slots: idr.xa_node_slots,
xa_node_shift: idr.xa_node_shift,
idr_xa_head: idr.idr_xa_head,
idr_next: idr.idr_next,
map_btf,
map_btf_value_type_id,
map_btf_vmlinux_value_type_id,
map_btf_key_type_id,
btf_data,
btf_data_size,
btf_base_btf,
htab_offsets,
task_storage_offsets,
struct_ops_offsets,
ringbuf_offsets,
stackmap_offsets,
})
}
}
#[derive(Debug, Clone)]
pub struct BpfProgOffsets {
pub prog_type: usize,
pub prog_aux: usize,
pub aux_verified_insns: usize,
pub aux_name: usize,
pub xa_node_slots: usize,
pub xa_node_shift: usize,
pub idr_xa_head: usize,
pub idr_next: usize,
pub prog_stats: usize,
pub stats_cnt: usize,
pub stats_nsecs: usize,
pub stats_misses: usize,
}
impl BpfProgOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (bpf_prog, _) = find_struct(btf, "bpf_prog")?;
let prog_type = member_byte_offset(btf, &bpf_prog, "type")?;
let prog_aux = member_byte_offset(btf, &bpf_prog, "aux")?;
let (bpf_prog_aux, _) = find_struct(btf, "bpf_prog_aux")?;
let aux_verified_insns = member_byte_offset(btf, &bpf_prog_aux, "verified_insns")?;
let aux_name = member_byte_offset(btf, &bpf_prog_aux, "name")?;
let idr = IdrOffsets::from_btf(btf)?;
let prog_stats = member_byte_offset(btf, &bpf_prog, "stats")?;
let (bpf_prog_stats, _) = find_struct(btf, "bpf_prog_stats")?;
let stats_cnt = member_byte_offset(btf, &bpf_prog_stats, "cnt")?;
let stats_nsecs = member_byte_offset(btf, &bpf_prog_stats, "nsecs")?;
let stats_misses = member_byte_offset(btf, &bpf_prog_stats, "misses")?;
Ok(Self {
prog_type,
prog_aux,
aux_verified_insns,
aux_name,
xa_node_slots: idr.xa_node_slots,
xa_node_shift: idr.xa_node_shift,
idr_xa_head: idr.idr_xa_head,
idr_next: idr.idr_next,
prog_stats,
stats_cnt,
stats_nsecs,
stats_misses,
})
}
#[allow(dead_code)]
pub fn from_vmlinux(path: &Path) -> Result<Self> {
let data =
std::fs::read(path).with_context(|| format!("read vmlinux: {}", path.display()))?;
Self::from_vmlinux_bytes(&data, path)
}
#[allow(dead_code)]
pub fn from_vmlinux_bytes(data: &[u8], path: &Path) -> Result<Self> {
let btf = load_btf_from_bytes(data, path)?;
Self::from_btf(&btf)
}
pub fn from_elf(elf: &goblin::elf::Elf<'_>, data: &[u8], path: &Path) -> Result<Self> {
let btf = load_btf_from_elf(elf, data, path)?;
Self::from_btf(&btf)
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)] pub struct RqStructOffsets {
pub scx: usize,
pub curr: usize,
}
impl RqStructOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (rq, _) = find_struct(btf, "rq")?;
Ok(Self {
scx: member_byte_offset(btf, &rq, "scx")?,
curr: member_byte_offset(btf, &rq, "curr")?,
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct ScxRqOffsets {
pub local_dsq: usize,
pub runnable_list: usize,
pub nr_running: usize,
pub flags: usize,
pub cpu_released: usize,
pub ops_qseq: usize,
pub kick_sync: Option<usize>,
pub nr_immed: Option<usize>,
pub clock: Option<usize>,
}
impl ScxRqOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (scx_rq, _) = find_struct(btf, "scx_rq")?;
let kick_sync = member_byte_offset(btf, &scx_rq, "kick_sync")
.ok()
.or_else(|| member_byte_offset(btf, &scx_rq, "pnt_seq").ok());
Ok(Self {
local_dsq: member_byte_offset(btf, &scx_rq, "local_dsq")?,
runnable_list: member_byte_offset(btf, &scx_rq, "runnable_list")?,
nr_running: member_byte_offset(btf, &scx_rq, "nr_running")?,
flags: member_byte_offset(btf, &scx_rq, "flags")?,
cpu_released: member_byte_offset(btf, &scx_rq, "cpu_released")?,
ops_qseq: member_byte_offset(btf, &scx_rq, "ops_qseq")?,
kick_sync,
nr_immed: member_byte_offset(btf, &scx_rq, "nr_immed").ok(),
clock: member_byte_offset(btf, &scx_rq, "clock").ok(),
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct TaskStructCoreOffsets {
pub comm: usize,
pub pid: usize,
pub scx: usize,
}
impl TaskStructCoreOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (task_struct, _) = find_struct(btf, "task_struct")?;
Ok(Self {
comm: member_byte_offset(btf, &task_struct, "comm")?,
pid: member_byte_offset(btf, &task_struct, "pid")?,
scx: member_byte_offset(btf, &task_struct, "scx")?,
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct TaskStructEnrichmentOffsets {
pub tgid: usize,
pub prio: usize,
pub static_prio: usize,
pub normal_prio: usize,
pub rt_priority: usize,
pub sched_class: usize,
pub core_cookie: Option<usize>,
pub real_parent: usize,
pub group_leader: usize,
pub signal: usize,
pub stack: usize,
pub nvcsw: usize,
pub nivcsw: usize,
}
impl TaskStructEnrichmentOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (task_struct, _) = find_struct(btf, "task_struct")?;
Ok(Self {
tgid: member_byte_offset(btf, &task_struct, "tgid")?,
prio: member_byte_offset(btf, &task_struct, "prio")?,
static_prio: member_byte_offset(btf, &task_struct, "static_prio")?,
normal_prio: member_byte_offset(btf, &task_struct, "normal_prio")?,
rt_priority: member_byte_offset(btf, &task_struct, "rt_priority")?,
sched_class: member_byte_offset(btf, &task_struct, "sched_class")?,
core_cookie: member_byte_offset(btf, &task_struct, "core_cookie").ok(),
real_parent: member_byte_offset(btf, &task_struct, "real_parent")?,
group_leader: member_byte_offset(btf, &task_struct, "group_leader")?,
signal: member_byte_offset(btf, &task_struct, "signal")?,
stack: member_byte_offset(btf, &task_struct, "stack")?,
nvcsw: member_byte_offset(btf, &task_struct, "nvcsw")?,
nivcsw: member_byte_offset(btf, &task_struct, "nivcsw")?,
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct SchedExtEntityOffsets {
pub runnable_node: usize,
pub runnable_at: usize,
pub weight: usize,
pub slice: usize,
pub dsq_vtime: usize,
pub dsq: usize,
pub dsq_list: usize,
pub flags: usize,
pub dsq_flags: usize,
pub sticky_cpu: usize,
pub holding_cpu: usize,
pub tasks_node: usize,
}
impl SchedExtEntityOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (see, _) = find_struct(btf, "sched_ext_entity")?;
Ok(Self {
runnable_node: member_byte_offset(btf, &see, "runnable_node")?,
runnable_at: member_byte_offset(btf, &see, "runnable_at")?,
weight: member_byte_offset(btf, &see, "weight")?,
slice: member_byte_offset(btf, &see, "slice")?,
dsq_vtime: member_byte_offset(btf, &see, "dsq_vtime")?,
dsq: member_byte_offset(btf, &see, "dsq")?,
dsq_list: member_byte_offset(btf, &see, "dsq_list")?,
flags: member_byte_offset(btf, &see, "flags")?,
dsq_flags: member_byte_offset(btf, &see, "dsq_flags")?,
sticky_cpu: member_byte_offset(btf, &see, "sticky_cpu")?,
holding_cpu: member_byte_offset(btf, &see, "holding_cpu")?,
tasks_node: member_byte_offset(btf, &see, "tasks_node")?,
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct ScxDsqListNodeOffsets {
pub node: usize,
pub flags: usize,
}
impl ScxDsqListNodeOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (lnode, _) = find_struct(btf, "scx_dsq_list_node")?;
Ok(Self {
node: member_byte_offset(btf, &lnode, "node")?,
flags: member_byte_offset(btf, &lnode, "flags")?,
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct ScxDispatchQOffsets {
pub list: usize,
pub nr: usize,
pub seq: usize,
pub id: usize,
pub hash_node: usize,
}
impl ScxDispatchQOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (dsq, _) = find_struct(btf, "scx_dispatch_q")?;
Ok(Self {
list: member_byte_offset(btf, &dsq, "list")?,
nr: member_byte_offset(btf, &dsq, "nr")?,
seq: member_byte_offset(btf, &dsq, "seq")?,
id: member_byte_offset(btf, &dsq, "id")?,
hash_node: member_byte_offset(btf, &dsq, "hash_node")?,
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct ScxSchedOffsets {
pub dsq_hash: usize,
pub pnode: Option<usize>,
pub pcpu: Option<usize>,
pub aborting: Option<usize>,
pub bypass_depth: Option<usize>,
pub exit_kind: usize,
}
impl ScxSchedOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (sched, _) = find_struct(btf, "scx_sched")?;
Ok(Self {
dsq_hash: member_byte_offset(btf, &sched, "dsq_hash")?,
pnode: member_byte_offset(btf, &sched, "pnode").ok(),
pcpu: member_byte_offset(btf, &sched, "pcpu").ok(),
aborting: member_byte_offset(btf, &sched, "aborting").ok(),
bypass_depth: member_byte_offset(btf, &sched, "bypass_depth").ok(),
exit_kind: member_byte_offset(btf, &sched, "exit_kind")?,
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct ScxSchedPnodeOffsets {
pub global_dsq: Option<usize>,
}
impl ScxSchedPnodeOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (pnode, _) = find_struct(btf, "scx_sched_pnode")?;
Ok(Self {
global_dsq: member_byte_offset(btf, &pnode, "global_dsq").ok(),
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct ScxSchedPcpuOffsets {
pub bypass_dsq: Option<usize>,
}
impl ScxSchedPcpuOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (pcpu, _) = find_struct(btf, "scx_sched_pcpu")?;
Ok(Self {
bypass_dsq: member_byte_offset(btf, &pcpu, "bypass_dsq").ok(),
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct RhashtableOffsets {
pub tbl: usize,
pub nelems: usize,
pub bucket_table_size: usize,
pub bucket_table_buckets: usize,
pub rhash_head_next: usize,
}
impl RhashtableOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (rht, _) = find_struct(btf, "rhashtable")?;
let tbl = member_byte_offset(btf, &rht, "tbl")?;
let nelems = member_byte_offset(btf, &rht, "nelems")?;
let (btab, _) = find_struct(btf, "bucket_table")?;
let bucket_table_size = member_byte_offset(btf, &btab, "size")?;
let bucket_table_buckets = member_byte_offset(btf, &btab, "buckets")?;
let (rhead, _) = find_struct(btf, "rhash_head")?;
let rhash_head_next = member_byte_offset(btf, &rhead, "next")?;
Ok(Self {
tbl,
nelems,
bucket_table_size,
bucket_table_buckets,
rhash_head_next,
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct SignalStructOffsets {
pub nr_threads: usize,
pub pids: usize,
pub nvcsw: usize,
pub nivcsw: usize,
}
impl SignalStructOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (signal, _) = find_struct(btf, "signal_struct")?;
Ok(Self {
nr_threads: member_byte_offset(btf, &signal, "nr_threads")?,
pids: member_byte_offset(btf, &signal, "pids")?,
nvcsw: member_byte_offset(btf, &signal, "nvcsw")?,
nivcsw: member_byte_offset(btf, &signal, "nivcsw")?,
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct PidStructOffsets {
pub numbers: usize,
pub size: usize,
}
impl PidStructOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (pid, _) = find_struct(btf, "pid")?;
let numbers = member_byte_offset(btf, &pid, "numbers")?;
Ok(Self {
numbers,
size: numbers,
})
}
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct UpidStructOffsets {
pub nr: usize,
pub size: usize,
}
impl UpidStructOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let (upid, _) = find_struct(btf, "upid")?;
Ok(Self {
nr: member_byte_offset(btf, &upid, "nr")?,
size: 16,
})
}
}
#[derive(Debug, Clone, Copy)]
pub struct RunnableScanOffsets {
pub task_struct_scx: usize,
pub sched_ext_entity_tasks_node: usize,
pub sched_ext_entity_flags: usize,
pub sched_ext_entity_runnable_at: usize,
pub sched_ext_entity_runnable_node: usize,
pub rq_scx: usize,
pub scx_rq_runnable_list: usize,
}
impl RunnableScanOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let task_core = TaskStructCoreOffsets::from_btf(btf)?;
let see = SchedExtEntityOffsets::from_btf(btf)?;
let rq = RqStructOffsets::from_btf(btf)?;
let scx_rq = ScxRqOffsets::from_btf(btf)?;
Ok(Self {
task_struct_scx: task_core.scx,
sched_ext_entity_tasks_node: see.tasks_node,
sched_ext_entity_flags: see.flags,
sched_ext_entity_runnable_at: see.runnable_at,
sched_ext_entity_runnable_node: see.runnable_node,
rq_scx: rq.scx,
scx_rq_runnable_list: scx_rq.runnable_list,
})
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct TaskEnrichmentOffsets {
pub task_struct_comm: usize,
pub task_struct_pid: usize,
pub task_struct_tgid: usize,
pub task_struct_prio: usize,
pub task_struct_static_prio: usize,
pub task_struct_normal_prio: usize,
pub task_struct_rt_priority: usize,
pub task_struct_sched_class: usize,
pub task_struct_scx: usize,
pub task_struct_core_cookie: Option<usize>,
pub task_struct_real_parent: usize,
pub task_struct_group_leader: usize,
pub task_struct_signal: usize,
pub task_struct_stack: usize,
pub see_weight: usize,
pub signal_struct_nr_threads: usize,
pub signal_struct_pids: usize,
pub signal_struct_nvcsw: usize,
pub signal_struct_nivcsw: usize,
pub task_struct_nvcsw: usize,
pub task_struct_nivcsw: usize,
pub pid_numbers: usize,
pub pid_size: usize,
pub upid_nr: usize,
pub upid_size: usize,
}
#[allow(dead_code)] impl TaskEnrichmentOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
let task_core = TaskStructCoreOffsets::from_btf(btf)?;
let task_ext = TaskStructEnrichmentOffsets::from_btf(btf)?;
let see = SchedExtEntityOffsets::from_btf(btf)?;
let signal = SignalStructOffsets::from_btf(btf)?;
let pid_offs = PidStructOffsets::from_btf(btf)?;
let upid = UpidStructOffsets::from_btf(btf)?;
Ok(Self {
task_struct_comm: task_core.comm,
task_struct_pid: task_core.pid,
task_struct_tgid: task_ext.tgid,
task_struct_prio: task_ext.prio,
task_struct_static_prio: task_ext.static_prio,
task_struct_normal_prio: task_ext.normal_prio,
task_struct_rt_priority: task_ext.rt_priority,
task_struct_sched_class: task_ext.sched_class,
task_struct_scx: task_core.scx,
task_struct_core_cookie: task_ext.core_cookie,
task_struct_real_parent: task_ext.real_parent,
task_struct_group_leader: task_ext.group_leader,
task_struct_signal: task_ext.signal,
task_struct_stack: task_ext.stack,
see_weight: see.weight,
signal_struct_nr_threads: signal.nr_threads,
signal_struct_pids: signal.pids,
signal_struct_nvcsw: signal.nvcsw,
signal_struct_nivcsw: signal.nivcsw,
task_struct_nvcsw: task_ext.nvcsw,
task_struct_nivcsw: task_ext.nivcsw,
pid_numbers: pid_offs.numbers,
pid_size: pid_offs.size,
upid_nr: upid.nr,
upid_size: upid.size,
})
}
}
#[allow(dead_code)]
pub mod pid_type {
pub const PID: usize = 0;
pub const TGID: usize = 1;
pub const PGID: usize = 2;
pub const SID: usize = 3;
}
#[derive(Debug, Clone)]
#[allow(dead_code)] pub struct ScxWalkerOffsets {
pub rq: Option<RqStructOffsets>,
pub scx_rq: Option<ScxRqOffsets>,
pub task: Option<TaskStructCoreOffsets>,
pub see: Option<SchedExtEntityOffsets>,
pub dsq_lnode: Option<ScxDsqListNodeOffsets>,
pub dsq: Option<ScxDispatchQOffsets>,
pub sched: Option<ScxSchedOffsets>,
pub sched_pnode: Option<ScxSchedPnodeOffsets>,
pub sched_pcpu: Option<ScxSchedPcpuOffsets>,
pub rht: Option<RhashtableOffsets>,
}
#[allow(dead_code)] impl ScxWalkerOffsets {
pub fn from_btf(btf: &Btf) -> Result<Self> {
Ok(Self {
rq: RqStructOffsets::from_btf(btf).ok(),
scx_rq: ScxRqOffsets::from_btf(btf).ok(),
task: TaskStructCoreOffsets::from_btf(btf).ok(),
see: SchedExtEntityOffsets::from_btf(btf).ok(),
dsq_lnode: ScxDsqListNodeOffsets::from_btf(btf).ok(),
dsq: ScxDispatchQOffsets::from_btf(btf).ok(),
sched: ScxSchedOffsets::from_btf(btf).ok(),
sched_pnode: ScxSchedPnodeOffsets::from_btf(btf).ok(),
sched_pcpu: ScxSchedPcpuOffsets::from_btf(btf).ok(),
rht: RhashtableOffsets::from_btf(btf).ok(),
})
}
pub fn missing_groups(&self) -> Vec<&'static str> {
let mut missing = Vec::new();
if self.rq.is_none() {
missing.push("rq");
}
if self.scx_rq.is_none() {
missing.push("scx_rq");
}
if self.task.is_none() {
missing.push("task_struct");
}
if self.see.is_none() {
missing.push("sched_ext_entity");
}
if self.dsq_lnode.is_none() {
missing.push("scx_dsq_list_node");
}
if self.dsq.is_none() {
missing.push("scx_dispatch_q");
}
if self.sched.is_none() {
missing.push("scx_sched");
}
if self.sched_pnode.is_none() {
missing.push("scx_sched_pnode");
}
if self.sched_pcpu.is_none() {
missing.push("scx_sched_pcpu");
}
if self.rht.is_none() {
missing.push("rhashtable/bucket_table/rhash_head");
}
missing
}
}
#[allow(dead_code)]
pub const SCX_DSQ_LNODE_ITER_CURSOR: u32 = 1;
#[allow(dead_code)]
pub const RHT_PTR_LOCK_BIT: u64 = 1;
#[cfg(test)]
mod tests;