use std::collections::{BTreeSet, HashSet};
use std::hash::{BuildHasher as _, Hash as _, Hasher as _};
use std::path::{Path, PathBuf};
use std::process::Command;
pub(crate) fn generate_btf_anchor(
bpf_object_dir: &Path,
clang: &str,
cflags: &[String],
anchor_path: &Path,
) -> Option<PathBuf> {
let mut bpf_sources = discover_sources_from_objects(bpf_object_dir);
if bpf_sources.is_empty() {
tracing::debug!("btf_anchor: no .bpf.c sources found via BTF");
return None;
}
tracing::debug!(
sources = bpf_sources.len(),
"btf_anchor: discovered BPF sources via BTF"
);
bpf_sources.sort();
let input_hash = {
let mut h = ahash::RandomState::with_seeds(0x6b74, 0x7374, 0x7200, 0x616e).build_hasher();
env!("CARGO_PKG_VERSION").hash(&mut h);
for p in &bpf_sources {
p.to_string_lossy().hash(&mut h);
}
for cflag in cflags {
cflag.hash(&mut h);
}
if let Ok(entries) = std::fs::read_dir(bpf_object_dir) {
let mut sizes: Vec<(String, u64)> = entries
.flatten()
.filter_map(|e| {
let name = e.file_name().to_string_lossy().to_string();
if name.ends_with(".bpf.o") {
e.metadata().ok().map(|m| (name, m.len()))
} else {
None
}
})
.collect();
sizes.sort();
for (name, size) in &sizes {
name.hash(&mut h);
size.hash(&mut h);
}
}
h.finish()
};
if let Some(old_hash) = read_anchor_hash(anchor_path)
&& old_hash == input_hash
{
tracing::debug!("btf_anchor: cached anchor is current");
let abs = std::fs::canonicalize(anchor_path).unwrap_or_else(|_| anchor_path.to_path_buf());
return Some(abs);
}
let dep_files = collect_dep_files(&bpf_sources, clang, cflags);
if dep_files.is_empty() {
tracing::debug!("btf_anchor: clang -M produced no dep files");
return None;
}
tracing::debug!(
files = dep_files.len(),
"btf_anchor: collected dep files via clang -M"
);
let structs = extract_struct_names(&dep_files);
if structs.is_empty() {
tracing::debug!("btf_anchor: no struct definitions found");
return None;
}
tracing::debug!(
structs = structs.len(),
"btf_anchor: extracted struct definitions"
);
if let Some(parent) = anchor_path.parent() {
let _ = std::fs::create_dir_all(parent);
}
write_anchor_header(anchor_path, &structs, input_hash)?;
let abs = std::fs::canonicalize(anchor_path).unwrap_or_else(|_| anchor_path.to_path_buf());
Some(abs)
}
fn discover_sources_from_objects(dir: &Path) -> Vec<PathBuf> {
let mut sources: HashSet<PathBuf> = HashSet::new();
let Ok(entries) = std::fs::read_dir(dir) else {
return Vec::new();
};
for entry in entries.flatten() {
let path = entry.path();
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
continue;
};
if !name.ends_with(".bpf.o") || name == "bpf.bpf.o" {
continue;
}
let Ok(bytes) = std::fs::read(&path) else {
continue;
};
if let Some(btf_data) = find_btf_section_raw(&bytes) {
for s in btf_strings(btf_data) {
if s.ends_with(".bpf.c") {
let p = PathBuf::from(s);
if p.is_file()
&& let Ok(canonical) = std::fs::canonicalize(&p)
{
sources.insert(canonical);
}
}
}
}
}
sources.into_iter().collect()
}
fn find_btf_section_raw(bytes: &[u8]) -> Option<&[u8]> {
if bytes.len() < 64 {
return None;
}
let e_shoff = u64::from_le_bytes(bytes[40..48].try_into().ok()?) as usize;
let e_shentsize = u16::from_le_bytes(bytes[58..60].try_into().ok()?) as usize;
let e_shnum = u16::from_le_bytes(bytes[60..62].try_into().ok()?) as usize;
let e_shstrndx = u16::from_le_bytes(bytes[62..64].try_into().ok()?) as usize;
if e_shstrndx >= e_shnum || e_shentsize < 64 {
return None;
}
let strtab_base = e_shoff + e_shstrndx * e_shentsize;
if strtab_base + 64 > bytes.len() {
return None;
}
let strtab_off =
u64::from_le_bytes(bytes[strtab_base + 24..strtab_base + 32].try_into().ok()?) as usize;
let strtab_size =
u64::from_le_bytes(bytes[strtab_base + 32..strtab_base + 40].try_into().ok()?) as usize;
if strtab_off + strtab_size > bytes.len() {
return None;
}
let strtab = &bytes[strtab_off..strtab_off + strtab_size];
for i in 0..e_shnum {
let base = e_shoff + i * e_shentsize;
if base + 64 > bytes.len() {
break;
}
let sh_name = u32::from_le_bytes(bytes[base..base + 4].try_into().ok()?) as usize;
if sh_name + 4 >= strtab.len() {
continue;
}
if &strtab[sh_name..sh_name + 4] != b".BTF" {
continue;
}
if sh_name + 4 < strtab.len() && strtab[sh_name + 4] != 0 {
continue;
}
let sh_offset = u64::from_le_bytes(bytes[base + 24..base + 32].try_into().ok()?) as usize;
let sh_size = u64::from_le_bytes(bytes[base + 32..base + 40].try_into().ok()?) as usize;
if sh_offset + sh_size <= bytes.len() && sh_size >= 24 {
return Some(&bytes[sh_offset..sh_offset + sh_size]);
}
}
None
}
fn btf_strings(btf: &[u8]) -> Vec<&str> {
if btf.len() < 24 {
return Vec::new();
}
let hdr_len = u32::from_le_bytes([btf[4], btf[5], btf[6], btf[7]]) as usize;
let str_off = u32::from_le_bytes([btf[16], btf[17], btf[18], btf[19]]) as usize;
let str_len = u32::from_le_bytes([btf[20], btf[21], btf[22], btf[23]]) as usize;
let str_start = hdr_len + str_off;
let str_end = str_start + str_len;
if str_end > btf.len() {
return Vec::new();
}
let str_section = &btf[str_start..str_end];
let mut result = Vec::new();
for chunk in str_section.split(|&b| b == 0) {
if let Ok(s) = std::str::from_utf8(chunk)
&& !s.is_empty()
{
result.push(s);
}
}
result
}
fn collect_dep_files(sources: &[PathBuf], clang: &str, cflags: &[String]) -> Vec<PathBuf> {
let all_deps = std::sync::Mutex::new(HashSet::<PathBuf>::new());
std::thread::scope(|s| {
for source in sources {
let deps_ref = &all_deps;
s.spawn(move || {
let output = Command::new(clang)
.arg("-M")
.arg("-MG")
.arg("-target")
.arg("bpf")
.args(cflags)
.arg(source)
.output();
let Ok(output) = output else { return };
if !output.status.success() {
return;
}
let mut local = HashSet::new();
let stdout = String::from_utf8_lossy(&output.stdout);
let joined = stdout.replace("\\\n", " ");
for line in joined.lines() {
let deps_part = match line.split_once(':') {
Some((_, deps)) => deps,
None => line,
};
for token in deps_part.split_whitespace() {
let p = PathBuf::from(token);
if p.is_file()
&& let Ok(canonical) = std::fs::canonicalize(&p)
&& !is_system_header(&canonical)
{
local.insert(canonical);
}
}
}
deps_ref.lock().unwrap().extend(local);
});
}
});
all_deps.into_inner().unwrap().into_iter().collect()
}
fn is_system_header(path: &Path) -> bool {
let s = path.to_string_lossy();
if s.contains("/usr/include/") || s.contains("/usr/lib/") {
return true;
}
if let Some(name) = path.file_name().and_then(|n| n.to_str())
&& (name == "vmlinux.h" || name == "vmlinux.bpf.h")
{
return true;
}
if s.contains("scx_utils-bpf_h/") {
return true;
}
false
}
fn extract_struct_names(files: &[PathBuf]) -> BTreeSet<String> {
let mut names = BTreeSet::new();
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_c::LANGUAGE.into())
.expect("tree-sitter-c language");
for file in files {
let Ok(content) = std::fs::read_to_string(file) else {
continue;
};
let Some(tree) = parser.parse(&content, None) else {
continue;
};
collect_structs(tree.root_node(), content.as_bytes(), &mut names);
}
names
}
fn collect_structs(node: tree_sitter::Node, source: &[u8], names: &mut BTreeSet<String>) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "struct_specifier"
&& child.child_by_field_name("body").is_some()
&& let Some(name_node) = child.child_by_field_name("name")
&& let Ok(name) = std::str::from_utf8(&source[name_node.byte_range()])
&& !name.is_empty()
&& !name.starts_with("__")
{
names.insert(name.to_string());
}
collect_structs(child, source, names);
}
}
fn read_anchor_hash(path: &Path) -> Option<u64> {
let content = std::fs::read_to_string(path).ok()?;
let line = content.lines().find(|l| l.starts_with("/* ktstr_hash="))?;
let hex = line.strip_prefix("/* ktstr_hash=")?.strip_suffix(" */")?;
u64::from_str_radix(hex, 16).ok()
}
fn write_anchor_header(path: &Path, structs: &BTreeSet<String>, hash: u64) -> Option<()> {
let mut src = String::new();
src.push_str(&format!("/* ktstr_hash={hash:016x} */\n"));
src.push_str("#ifndef __KTSTR_BTF_ANCHOR_H\n");
src.push_str("#define __KTSTR_BTF_ANCHOR_H\n");
for (i, s) in structs.iter().enumerate() {
src.push_str(&format!(
"struct {s} __attribute__((weak)) *__ktstr_keep_{i};\n"
));
}
src.push_str("#endif\n");
std::fs::write(path, &src).ok()
}
#[cfg(test)]
mod tests {
use super::*;
fn btf_blob(hdr_len: u32, str_off: u32, str_section: &[u8]) -> Vec<u8> {
let mut v = vec![0u8; 24];
v[4..8].copy_from_slice(&hdr_len.to_le_bytes());
v[16..20].copy_from_slice(&str_off.to_le_bytes());
v[20..24].copy_from_slice(&(str_section.len() as u32).to_le_bytes());
let start = hdr_len as usize + str_off as usize;
if v.len() < start {
v.resize(start, 0);
}
v.extend_from_slice(str_section);
v
}
const SHENTSIZE: usize = 64;
struct Sect {
name: &'static str,
payload: Vec<u8>,
}
fn build_elf(sections: &[Sect], shstrndx: u16) -> Vec<u8> {
let shnum = sections.len();
let ehdr_size = 64usize;
let e_shoff = ehdr_size;
let sht_size = shnum * SHENTSIZE;
let mut shstrtab = vec![0u8]; let mut name_off = vec![0u32; shnum];
for (i, s) in sections.iter().enumerate() {
name_off[i] = shstrtab.len() as u32;
shstrtab.extend_from_slice(s.name.as_bytes());
shstrtab.push(0);
}
let payload_base = e_shoff + sht_size;
let mut payloads: Vec<Vec<u8>> = Vec::with_capacity(shnum);
for (i, s) in sections.iter().enumerate() {
if i == shstrndx as usize {
payloads.push(shstrtab.clone());
} else {
payloads.push(s.payload.clone());
}
}
let mut sh_offsets = vec![0usize; shnum];
let mut cur = payload_base;
for i in 0..shnum {
sh_offsets[i] = cur;
cur += payloads[i].len();
}
let total = cur;
let mut img = vec![0u8; total];
img[0..4].copy_from_slice(b"\x7fELF");
img[4] = 2; img[5] = 1; img[40..48].copy_from_slice(&(e_shoff as u64).to_le_bytes());
img[58..60].copy_from_slice(&(SHENTSIZE as u16).to_le_bytes());
img[60..62].copy_from_slice(&(shnum as u16).to_le_bytes());
img[62..64].copy_from_slice(&shstrndx.to_le_bytes());
for i in 0..shnum {
let base = e_shoff + i * SHENTSIZE;
img[base..base + 4].copy_from_slice(&name_off[i].to_le_bytes());
img[base + 24..base + 32].copy_from_slice(&(sh_offsets[i] as u64).to_le_bytes());
img[base + 32..base + 40].copy_from_slice(&(payloads[i].len() as u64).to_le_bytes());
}
for i in 0..shnum {
let off = sh_offsets[i];
img[off..off + payloads[i].len()].copy_from_slice(&payloads[i]);
}
img
}
#[test]
fn btf_strings_splits_on_nul_and_drops_empties() {
let section = b"\0foo\0bar.bpf.c\0\0baz\0";
let blob = btf_blob(24, 0, section);
assert_eq!(btf_strings(&blob), vec!["foo", "bar.bpf.c", "baz"]);
}
#[test]
fn btf_strings_honors_hdr_len_plus_str_off() {
let section = b"\0alpha\0beta\0";
let blob = btf_blob(32, 4, section);
assert_eq!(btf_strings(&blob), vec!["alpha", "beta"]);
}
#[test]
fn btf_strings_too_short_returns_empty() {
assert!(btf_strings(&[0u8; 23]).is_empty());
}
#[test]
fn btf_strings_section_past_end_returns_empty() {
let mut blob = btf_blob(24, 0, b"\0only\0");
let bogus_len = (blob.len() as u32) + 100;
blob[20..24].copy_from_slice(&bogus_len.to_le_bytes());
assert!(btf_strings(&blob).is_empty());
}
#[test]
fn btf_strings_skips_invalid_utf8_chunk() {
let section = b"\0good\0\xff\xfe\0also_good\0";
let blob = btf_blob(24, 0, section);
assert_eq!(btf_strings(&blob), vec!["good", "also_good"]);
}
#[test]
fn find_btf_section_raw_returns_btf_payload() {
let btf_payload: Vec<u8> = (0u8..40).collect(); let img = build_elf(
&[
Sect {
name: "",
payload: vec![],
}, Sect {
name: ".text",
payload: vec![0xab; 8],
},
Sect {
name: ".BTF",
payload: btf_payload.clone(),
},
Sect {
name: ".shstrtab",
payload: vec![],
},
],
3,
);
let found = find_btf_section_raw(&img).expect("should find .BTF");
assert_eq!(found, btf_payload.as_slice());
}
#[test]
fn find_btf_section_raw_too_short_is_none() {
assert!(find_btf_section_raw(&[0u8; 63]).is_none());
}
#[test]
fn find_btf_section_raw_absent_section_is_none() {
let img = build_elf(
&[
Sect {
name: "",
payload: vec![],
},
Sect {
name: ".data",
payload: vec![1, 2, 3],
},
Sect {
name: ".shstrtab",
payload: vec![],
},
],
2,
);
assert!(find_btf_section_raw(&img).is_none());
}
#[test]
fn find_btf_section_raw_rejects_btf_ext_prefix_collision() {
let payload: Vec<u8> = (0u8..40).collect();
let img = build_elf(
&[
Sect {
name: "",
payload: vec![],
},
Sect {
name: ".BTF.ext",
payload,
},
Sect {
name: ".shstrtab",
payload: vec![],
},
],
2,
);
assert!(find_btf_section_raw(&img).is_none());
}
#[test]
fn find_btf_section_raw_rejects_undersized_btf() {
let img = build_elf(
&[
Sect {
name: "",
payload: vec![],
},
Sect {
name: ".BTF",
payload: vec![0u8; 23],
}, Sect {
name: ".shstrtab",
payload: vec![],
},
],
2,
);
assert!(find_btf_section_raw(&img).is_none());
}
#[test]
fn find_btf_section_raw_rejects_shstrndx_out_of_range() {
let mut img = build_elf(
&[
Sect {
name: "",
payload: vec![],
},
Sect {
name: ".BTF",
payload: (0u8..40).collect(),
},
Sect {
name: ".shstrtab",
payload: vec![],
},
],
2,
);
img[62..64].copy_from_slice(&3u16.to_le_bytes());
assert!(find_btf_section_raw(&img).is_none());
}
#[test]
fn find_btf_section_raw_rejects_small_shentsize() {
let mut img = build_elf(
&[
Sect {
name: "",
payload: vec![],
},
Sect {
name: ".BTF",
payload: (0u8..40).collect(),
},
Sect {
name: ".shstrtab",
payload: vec![],
},
],
2,
);
img[58..60].copy_from_slice(&63u16.to_le_bytes());
assert!(find_btf_section_raw(&img).is_none());
}
#[test]
fn find_btf_section_raw_then_btf_strings_recovers_source_path() {
let btf = btf_blob(24, 0, b"\0/abs/sched.bpf.c\0task_struct\0");
let img = build_elf(
&[
Sect {
name: "",
payload: vec![],
},
Sect {
name: ".BTF",
payload: btf.clone(),
},
Sect {
name: ".shstrtab",
payload: vec![],
},
],
2,
);
let raw = find_btf_section_raw(&img).expect(".BTF present");
assert_eq!(raw, btf.as_slice());
assert_eq!(btf_strings(raw), vec!["/abs/sched.bpf.c", "task_struct"],);
}
#[test]
fn is_system_header_matrix() {
let cases: &[(&str, bool)] = &[
("/usr/include/stdio.h", true),
("/usr/lib/clang/18/include/stddef.h", true),
("/home/x/.cache/vmlinux.h", true),
("/some/dir/vmlinux.bpf.h", true),
("/build/scx_utils-bpf_h/bpf/common.bpf.h", true),
("/home/x/scheds/scx_simple.bpf.c", false),
("/home/x/scheds/intf.h", false),
("/home/x/vmlinux.h.bak", false),
("/opt/myusr/include/foo.h", false),
];
for (p, expected) in cases {
assert_eq!(
is_system_header(Path::new(p)),
*expected,
"is_system_header({p:?}) should be {expected}",
);
}
}
#[test]
fn extract_struct_names_collects_defined_named_structs() {
let dir = tempfile::tempdir().expect("tempdir");
let src = r#"
struct foo { int a; }; /* defined, named -> kept */
struct bar; /* forward decl -> no body, skip */
struct { int x; } anon_var; /* anonymous -> no name, skip */
struct __internal { int y; }; /* __-prefixed -> skip */
typedef struct baz { int z; } baz_t;/* named body -> baz kept */
void use_bar(struct bar *p) {} /* reference only -> no body */
"#;
let file = dir.path().join("types.h");
std::fs::write(&file, src).expect("write src");
let got = extract_struct_names(&[file]);
let names: Vec<&str> = got.iter().map(String::as_str).collect();
assert_eq!(names, vec!["baz", "foo"]);
}
#[test]
fn extract_struct_names_dedups_across_files() {
let dir = tempfile::tempdir().expect("tempdir");
let a = dir.path().join("a.h");
let b = dir.path().join("b.h");
std::fs::write(&a, "struct shared { int a; };\nstruct only_a { int x; };").unwrap();
std::fs::write(&b, "struct shared { int a; };\nstruct only_b { int y; };").unwrap();
let got = extract_struct_names(&[a, b]);
let names: Vec<&str> = got.iter().map(String::as_str).collect();
assert_eq!(names, vec!["only_a", "only_b", "shared"]);
}
#[test]
fn extract_struct_names_skips_unreadable_file() {
let dir = tempfile::tempdir().expect("tempdir");
let good = dir.path().join("good.h");
std::fs::write(&good, "struct kept { int a; };").unwrap();
let missing = dir.path().join("does_not_exist.h");
let got = extract_struct_names(&[missing, good]);
let names: Vec<&str> = got.iter().map(String::as_str).collect();
assert_eq!(names, vec!["kept"]);
}
#[test]
fn collect_structs_recurses_into_nested_definitions() {
let dir = tempfile::tempdir().expect("tempdir");
let file = dir.path().join("nested.h");
std::fs::write(&file, "struct outer { struct inner { int a; } i; int b; };").unwrap();
let got = extract_struct_names(&[file]);
let names: Vec<&str> = got.iter().map(String::as_str).collect();
assert_eq!(names, vec!["inner", "outer"]);
}
#[test]
fn anchor_hash_round_trips_full_u64_range() {
let dir = tempfile::tempdir().expect("tempdir");
let mut structs = BTreeSet::new();
structs.insert("task_struct".to_string());
for hash in [0u64, 0x0123_4567_89ab_cdef, u64::MAX] {
let path = dir.path().join(format!("anchor_{hash:x}.h"));
write_anchor_header(&path, &structs, hash).expect("write");
assert_eq!(
read_anchor_hash(&path),
Some(hash),
"round-trip failed for hash {hash:#018x}",
);
}
}
#[test]
fn write_anchor_header_renders_exact_body() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("anchor.h");
let mut structs = BTreeSet::new();
structs.insert("zeta".to_string());
structs.insert("alpha".to_string());
write_anchor_header(&path, &structs, 0xdead_beef).expect("write");
let content = std::fs::read_to_string(&path).expect("read back");
let expected = "\
/* ktstr_hash=00000000deadbeef */
#ifndef __KTSTR_BTF_ANCHOR_H
#define __KTSTR_BTF_ANCHOR_H
struct alpha __attribute__((weak)) *__ktstr_keep_0;
struct zeta __attribute__((weak)) *__ktstr_keep_1;
#endif
";
assert_eq!(content, expected);
}
#[test]
fn write_anchor_header_empty_set_emits_guarded_header() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("empty.h");
let structs = BTreeSet::new();
write_anchor_header(&path, &structs, 0x42).expect("write");
let content = std::fs::read_to_string(&path).expect("read");
let expected = "\
/* ktstr_hash=0000000000000042 */
#ifndef __KTSTR_BTF_ANCHOR_H
#define __KTSTR_BTF_ANCHOR_H
#endif
";
assert_eq!(content, expected);
assert_eq!(read_anchor_hash(&path), Some(0x42));
}
#[test]
fn read_anchor_hash_missing_file_is_none() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("nope.h");
assert_eq!(read_anchor_hash(&path), None);
}
#[test]
fn read_anchor_hash_no_marker_line_is_none() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("plain.h");
std::fs::write(&path, "#ifndef X\n#define X\n#endif\n").unwrap();
assert_eq!(read_anchor_hash(&path), None);
}
#[test]
fn read_anchor_hash_non_hex_payload_is_none() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("bad.h");
std::fs::write(&path, "/* ktstr_hash=00zz0000deadbeef */\n").unwrap();
assert_eq!(read_anchor_hash(&path), None);
}
#[test]
fn read_anchor_hash_missing_suffix_is_none() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("nosuffix.h");
std::fs::write(&path, "/* ktstr_hash=00000000deadbeef\n").unwrap();
assert_eq!(read_anchor_hash(&path), None);
}
}