use std::borrow::Cow;
use std::fs::File;
use std::path::{Path, PathBuf};
use anyhow::{anyhow, Context, Result};
use gimli::{
AttributeValue, DebuggingInformationEntry, Dwarf, EndianSlice, RunTimeEndian, Unit,
};
use memmap2::Mmap;
use object::{Object, ObjectSection};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Symbol {
pub name: String,
pub name_demangled: String,
pub name_mangled: String,
pub file: String,
pub line: Option<u32>,
pub size_bytes: Option<u64>,
pub krate: String,
}
pub fn extract_symbols(binary_path: &Path, workspace_root: &Path) -> Result<Vec<Symbol>> {
let file = File::open(binary_path)
.with_context(|| format!("open binary {}", binary_path.display()))?;
let mmap = unsafe { Mmap::map(&file) }
.with_context(|| format!("mmap {}", binary_path.display()))?;
let object = object::File::parse(&*mmap)
.with_context(|| format!("parse object {}", binary_path.display()))?;
let endian = if object.is_little_endian() {
RunTimeEndian::Little
} else {
RunTimeEndian::Big
};
let load_section = |id: gimli::SectionId| -> Result<Cow<[u8]>> {
Ok(match object.section_by_name(id.name()) {
Some(s) => s.uncompressed_data().unwrap_or(Cow::Borrowed(&[])),
None => Cow::Borrowed(&[]),
})
};
let dwarf_sections = gimli::DwarfSections::load(load_section)?;
let dwarf = dwarf_sections.borrow(|section| EndianSlice::new(section, endian));
let abs_root = workspace_root
.canonicalize()
.unwrap_or_else(|_| workspace_root.to_path_buf());
let mut out = Vec::new();
let mut iter = dwarf.units();
while let Some(header) = iter.next()? {
let unit = dwarf.unit(header)?;
let comp_dir = unit
.comp_dir
.as_ref()
.map(|s| PathBuf::from(s.to_string_lossy().into_owned()));
let file_table = build_file_table(&dwarf, &unit, comp_dir.as_deref())?;
let mut entries = unit.entries();
while let Some(entry) = entries.next_dfs()? {
if entry.tag() != gimli::DW_TAG_subprogram {
continue;
}
if let Some(sym) = extract_one(&dwarf, &unit, entry, &file_table, &abs_root)? {
out.push(sym);
}
}
}
Ok(out)
}
type FileTable = Vec<PathBuf>;
fn build_file_table(
dwarf: &Dwarf<EndianSlice<RunTimeEndian>>,
unit: &Unit<EndianSlice<RunTimeEndian>>,
comp_dir: Option<&Path>,
) -> Result<FileTable> {
let mut out: FileTable = Vec::new();
let Some(program) = unit.line_program.clone() else {
return Ok(out);
};
let header = program.header();
for (idx, file) in header.file_names().iter().enumerate() {
let mut path = PathBuf::new();
if let Some(dir_idx) = file.directory(header) {
if let Ok(dir) = dwarf.attr_string(unit, dir_idx) {
let dir_str = dir.to_string_lossy().into_owned();
if Path::new(&dir_str).is_absolute() {
path.push(dir_str);
} else {
if let Some(cd) = comp_dir { path.push(cd); }
path.push(dir_str);
}
}
} else if let Some(cd) = comp_dir {
path.push(cd);
}
if let Ok(name) = dwarf.attr_string(unit, file.path_name()) {
path.push(name.to_string_lossy().into_owned());
}
while out.len() <= idx { out.push(PathBuf::new()); }
out[idx] = path;
}
Ok(out)
}
fn extract_one(
dwarf: &Dwarf<EndianSlice<RunTimeEndian>>,
unit: &Unit<EndianSlice<RunTimeEndian>>,
entry: &DebuggingInformationEntry<EndianSlice<RunTimeEndian>>,
files: &FileTable,
abs_root: &Path,
) -> Result<Option<Symbol>> {
let linkage = attr_string(dwarf, unit, entry, gimli::DW_AT_linkage_name)?;
let plain = attr_string(dwarf, unit, entry, gimli::DW_AT_name)?;
let mangled = match (linkage, plain.clone()) {
(Some(l), _) => l,
(None, Some(n)) => n,
(None, None) => return Ok(None),
};
let demangled = rustc_demangle::try_demangle(&mangled)
.map(|d| format!("{:#}", d))
.unwrap_or_else(|_| mangled.clone());
let stripped = strip_generics(&demangled);
let file_idx = match entry.attr_value(gimli::DW_AT_decl_file) {
Some(AttributeValue::FileIndex(n)) => Some(n as usize),
_ => None,
};
let line = match entry.attr_value(gimli::DW_AT_decl_line) {
Some(AttributeValue::Udata(n)) => Some(n as u32),
_ => None,
};
let file_path = file_idx.and_then(|i| files.get(i)).cloned();
let Some(fp) = file_path else { return Ok(None) };
if fp.as_os_str().is_empty() {
return Ok(None);
}
let canon = fp.canonicalize().unwrap_or(fp.clone());
let Ok(rel) = canon.strip_prefix(abs_root) else {
return Ok(None);
};
let krate = rel
.components()
.next()
.map(|c| c.as_os_str().to_string_lossy().into_owned())
.unwrap_or_default();
let file = rel.to_string_lossy().into_owned();
let low = match entry.attr_value(gimli::DW_AT_low_pc) {
Some(AttributeValue::Addr(a)) => Some(a),
_ => None,
};
let size_bytes = match (low, entry.attr_value(gimli::DW_AT_high_pc)) {
(Some(_), Some(AttributeValue::Udata(n))) => Some(n),
(Some(a), Some(AttributeValue::Addr(b))) => Some(b.saturating_sub(a)),
_ => None,
};
Ok(Some(Symbol {
name: stripped,
name_demangled: demangled,
name_mangled: mangled,
file,
line,
size_bytes,
krate,
}))
}
fn attr_string(
dwarf: &Dwarf<EndianSlice<RunTimeEndian>>,
unit: &Unit<EndianSlice<RunTimeEndian>>,
entry: &DebuggingInformationEntry<EndianSlice<RunTimeEndian>>,
name: gimli::DwAt,
) -> Result<Option<String>> {
let Some(v) = entry.attr_value(name) else { return Ok(None) };
let s = dwarf
.attr_string(unit, v)
.map_err(|e| anyhow!("attr_string: {e}"))?;
Ok(Some(s.to_string_lossy().into_owned()))
}
fn strip_generics(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut depth = 0i32;
for ch in s.chars() {
match ch {
'<' => depth += 1,
'>' => { if depth > 0 { depth -= 1; } }
_ if depth == 0 => out.push(ch),
_ => {}
}
}
out
}
pub fn lookup<'a>(symbols: &'a [Symbol], pattern: &str) -> Vec<&'a Symbol> {
symbols
.iter()
.filter(|s| {
s.name.contains(pattern)
|| s.name_demangled.contains(pattern)
|| s.name_mangled.contains(pattern)
})
.collect()
}
pub fn defined_in<'a>(symbols: &'a [Symbol], suffix: &str) -> Vec<&'a Symbol> {
symbols.iter().filter(|s| s.file.ends_with(suffix)).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_own_binary() {
let bin = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("target/debug/nornir");
if !bin.exists() {
eprintln!("skipping: {} not built", bin.display());
return;
}
let root = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let syms = extract_symbols(&bin, &root).expect("extract");
assert!(!syms.is_empty(), "expected symbols in {}", bin.display());
let nornir_syms: Vec<_> = syms.iter().filter(|s| s.krate == "nornir").collect();
assert!(
!nornir_syms.is_empty(),
"expected some symbols from crate `nornir` (got {} total)",
syms.len()
);
let build_hits = lookup(&syms, "Index");
assert!(!build_hits.is_empty(), "expected `Index` symbols");
}
#[test]
fn strip_generics_basic() {
assert_eq!(strip_generics("Vec<u32>::new"), "Vec::new");
assert_eq!(
strip_generics("foo::Bar<X, Y<Z>>::baz"),
"foo::Bar::baz"
);
assert_eq!(strip_generics("plain"), "plain");
}
}