use std::borrow::Cow;
use std::collections::HashMap;
use std::fs::File;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use gimli::{
AttributeValue, DwTag, Dwarf, EndianSlice, EntriesTreeNode, RunTimeEndian, Unit, UnitOffset,
};
use memmap2::Mmap;
use object::{Object, ObjectSection};
use petgraph::algo::dijkstra;
use petgraph::graph::{DiGraph, NodeIndex};
use petgraph::visit::EdgeRef;
use petgraph::Direction;
use serde::{Deserialize, Serialize};
use super::artifact::{self, Symbol};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CallKind {
Inline,
Direct,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CallEdge {
pub caller: String,
pub callee: String,
pub kind: CallKind,
}
pub fn extract_callgraph(binary_path: &Path, workspace_root: &Path) -> Result<Vec<CallEdge>> {
let file = File::open(binary_path)
.with_context(|| format!("open binary {}", binary_path.display()))?;
let mmap = unsafe { Mmap::map(&file) }
.with_context(|| format!("mmap {}", binary_path.display()))?;
let object = object::File::parse(&*mmap)
.with_context(|| format!("parse object {}", binary_path.display()))?;
let endian = if object.is_little_endian() {
RunTimeEndian::Little
} else {
RunTimeEndian::Big
};
let load_section = |id: gimli::SectionId| -> Result<Cow<[u8]>> {
Ok(match object.section_by_name(id.name()) {
Some(s) => s.uncompressed_data().unwrap_or(Cow::Borrowed(&[])),
None => Cow::Borrowed(&[]),
})
};
let dwarf_sections = gimli::DwarfSections::load(load_section)?;
let dwarf = dwarf_sections.borrow(|section| EndianSlice::new(section, endian));
let abs_root = workspace_root
.canonicalize()
.unwrap_or_else(|_| workspace_root.to_path_buf());
let mut edges = Vec::new();
let mut units = dwarf.units();
while let Some(header) = units.next()? {
let unit = dwarf.unit(header)?;
let comp_dir = unit
.comp_dir
.as_ref()
.map(|s| PathBuf::from(s.to_string_lossy().into_owned()));
let files = build_file_table(&dwarf, &unit, comp_dir.as_deref())?;
let names = build_name_map(&dwarf, &unit, &files, &abs_root)?;
let mut tree = unit.entries_tree(None)?;
let root = tree.root()?;
walk(root, None, &names, &mut edges, &dwarf, &unit)?;
}
Ok(edges)
}
type NameMap = HashMap<UnitOffset, String>;
fn walk(
node: EntriesTreeNode<EndianSlice<RunTimeEndian>>,
enclosing: Option<&str>,
names: &NameMap,
out: &mut Vec<CallEdge>,
dwarf: &Dwarf<EndianSlice<RunTimeEndian>>,
unit: &Unit<EndianSlice<RunTimeEndian>>,
) -> Result<()> {
let entry = node.entry();
let tag = entry.tag();
let mut current_caller = enclosing.map(|s| s.to_string());
if tag == gimli::DW_TAG_subprogram {
if let Some(name) = names.get(&entry.offset()) {
current_caller = Some(name.clone());
}
} else if tag == gimli::DW_TAG_inlined_subroutine {
if let Some(caller) = enclosing {
if let Some(origin) = abstract_origin(entry) {
if let Some(callee) = resolve_name(origin, names, dwarf, unit)? {
if caller != callee {
out.push(CallEdge {
caller: caller.to_string(),
callee,
kind: CallKind::Inline,
});
}
}
}
}
}
let mut children = node.children();
while let Some(child) = children.next()? {
walk(child, current_caller.as_deref(), names, out, dwarf, unit)?;
}
Ok(())
}
fn abstract_origin(
entry: &gimli::DebuggingInformationEntry<EndianSlice<RunTimeEndian>>,
) -> Option<UnitOffset> {
match entry.attr_value(gimli::DW_AT_abstract_origin) {
Some(AttributeValue::UnitRef(offset)) => Some(offset),
_ => None,
}
}
fn resolve_name(
offset: UnitOffset,
cache: &NameMap,
dwarf: &Dwarf<EndianSlice<RunTimeEndian>>,
unit: &Unit<EndianSlice<RunTimeEndian>>,
) -> Result<Option<String>> {
if let Some(n) = cache.get(&offset) {
return Ok(Some(n.clone()));
}
let entry = unit.entry(offset)?;
Ok(read_subprogram_name(&entry, dwarf, unit))
}
fn read_subprogram_name(
entry: &gimli::DebuggingInformationEntry<EndianSlice<RunTimeEndian>>,
dwarf: &Dwarf<EndianSlice<RunTimeEndian>>,
unit: &Unit<EndianSlice<RunTimeEndian>>,
) -> Option<String> {
let raw = if let Some(v) = entry.attr_value(gimli::DW_AT_linkage_name) {
dwarf.attr_string(unit, v).ok().map(|s| s.to_string_lossy().into_owned())
} else if let Some(v) = entry.attr_value(gimli::DW_AT_name) {
dwarf.attr_string(unit, v).ok().map(|s| s.to_string_lossy().into_owned())
} else {
None
}?;
let demangled = rustc_demangle::try_demangle(&raw)
.map(|d| format!("{:#}", d))
.unwrap_or(raw);
Some(strip_generics(&demangled))
}
type FileTable = Vec<PathBuf>;
fn build_file_table(
dwarf: &Dwarf<EndianSlice<RunTimeEndian>>,
unit: &Unit<EndianSlice<RunTimeEndian>>,
comp_dir: Option<&Path>,
) -> Result<FileTable> {
let mut out: FileTable = Vec::new();
let Some(program) = unit.line_program.clone() else { return Ok(out) };
let header = program.header();
for (idx, file) in header.file_names().iter().enumerate() {
let mut path = PathBuf::new();
if let Some(dir_idx) = file.directory(header) {
if let Ok(dir) = dwarf.attr_string(unit, dir_idx) {
let dir_str = dir.to_string_lossy().into_owned();
if Path::new(&dir_str).is_absolute() {
path.push(dir_str);
} else {
if let Some(cd) = comp_dir { path.push(cd); }
path.push(dir_str);
}
}
} else if let Some(cd) = comp_dir {
path.push(cd);
}
if let Ok(name) = dwarf.attr_string(unit, file.path_name()) {
path.push(name.to_string_lossy().into_owned());
}
while out.len() <= idx { out.push(PathBuf::new()); }
out[idx] = path;
}
Ok(out)
}
fn build_name_map(
dwarf: &Dwarf<EndianSlice<RunTimeEndian>>,
unit: &Unit<EndianSlice<RunTimeEndian>>,
files: &FileTable,
abs_root: &Path,
) -> Result<NameMap> {
let mut out = HashMap::new();
let mut tree = unit.entries_tree(None)?;
let root = tree.root()?;
collect_names(root, &mut out, files, abs_root, dwarf, unit)?;
Ok(out)
}
fn collect_names(
node: EntriesTreeNode<EndianSlice<RunTimeEndian>>,
out: &mut NameMap,
files: &FileTable,
abs_root: &Path,
dwarf: &Dwarf<EndianSlice<RunTimeEndian>>,
unit: &Unit<EndianSlice<RunTimeEndian>>,
) -> Result<()> {
{
let entry = node.entry();
if entry.tag() == gimli::DW_TAG_subprogram && in_workspace(entry, files, abs_root) {
if let Some(name) = read_subprogram_name(entry, dwarf, unit) {
out.insert(entry.offset(), name);
}
}
}
let mut children = node.children();
while let Some(child) = children.next()? {
collect_names(child, out, files, abs_root, dwarf, unit)?;
}
Ok(())
}
fn in_workspace(
entry: &gimli::DebuggingInformationEntry<EndianSlice<RunTimeEndian>>,
files: &FileTable,
abs_root: &Path,
) -> bool {
let Some(AttributeValue::FileIndex(idx)) = entry.attr_value(gimli::DW_AT_decl_file) else {
return false;
};
let Some(fp) = files.get(idx as usize) else { return false };
if fp.as_os_str().is_empty() {
return false;
}
let canon = fp.canonicalize().unwrap_or(fp.clone());
canon.starts_with(abs_root)
}
fn strip_generics(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut depth = 0i32;
for ch in s.chars() {
match ch {
'<' => depth += 1,
'>' => { if depth > 0 { depth -= 1; } }
_ if depth == 0 => out.push(ch),
_ => {}
}
}
out
}
#[allow(dead_code)]
fn _touch(_t: DwTag, _s: Option<Symbol>) {}
pub struct Callgraph {
g: DiGraph<String, CallKind>,
by_name: HashMap<String, NodeIndex>,
}
impl Callgraph {
pub fn from_edges(edges: &[CallEdge]) -> Self {
let mut g: DiGraph<String, CallKind> = DiGraph::new();
let mut by_name: HashMap<String, NodeIndex> = HashMap::new();
let ensure = |g: &mut DiGraph<String, CallKind>,
by_name: &mut HashMap<String, NodeIndex>,
name: &str|
-> NodeIndex {
if let Some(idx) = by_name.get(name) {
return *idx;
}
let idx = g.add_node(name.to_string());
by_name.insert(name.to_string(), idx);
idx
};
for e in edges {
let a = ensure(&mut g, &mut by_name, &e.caller);
let b = ensure(&mut g, &mut by_name, &e.callee);
g.add_edge(a, b, e.kind);
}
Self { g, by_name }
}
pub fn node_count(&self) -> usize { self.g.node_count() }
pub fn edge_count(&self) -> usize { self.g.edge_count() }
pub fn callers_of(&self, name: &str) -> Vec<String> {
self.neighbors(name, Direction::Incoming)
}
pub fn callees_of(&self, name: &str) -> Vec<String> {
self.neighbors(name, Direction::Outgoing)
}
fn neighbors(&self, name: &str, dir: Direction) -> Vec<String> {
let Some(&idx) = self.by_name.get(name) else { return Vec::new() };
let mut out: Vec<String> = self
.g
.edges_directed(idx, dir)
.map(|e| {
let other = if dir == Direction::Outgoing { e.target() } else { e.source() };
self.g[other].clone()
})
.collect();
out.sort();
out.dedup();
out
}
pub fn path_between(&self, from: &str, to: &str) -> Option<Vec<String>> {
let &start = self.by_name.get(from)?;
let &goal = self.by_name.get(to)?;
if start == goal {
return Some(vec![self.g[start].clone()]);
}
let dist = dijkstra(&self.g, start, Some(goal), |_| 1usize);
if !dist.contains_key(&goal) {
return None;
}
let mut path = vec![goal];
let mut current = goal;
while current != start {
let d = dist[¤t];
let mut next = None;
for e in self.g.edges_directed(current, Direction::Incoming) {
let pred = e.source();
if dist.get(&pred) == Some(&(d - 1)) {
next = Some(pred);
break;
}
}
let Some(p) = next else { return None };
path.push(p);
current = p;
}
path.reverse();
Some(path.into_iter().map(|n| self.g[n].clone()).collect())
}
}
pub use artifact::extract_symbols;
#[cfg(test)]
mod tests {
use super::*;
fn e(caller: &str, callee: &str) -> CallEdge {
CallEdge { caller: caller.into(), callee: callee.into(), kind: CallKind::Inline }
}
#[test]
fn callers_callees_and_path() {
let cg = Callgraph::from_edges(&[
e("a", "b"), e("b", "c"), e("c", "d"), e("a", "e"), e("e", "d"),
]);
assert_eq!(cg.callees_of("a"), vec!["b".to_string(), "e".to_string()]);
assert_eq!(cg.callers_of("d"), vec!["c".to_string(), "e".to_string()]);
let p = cg.path_between("a", "d").unwrap();
assert_eq!(p.len(), 3, "expected shortest 3-hop path, got {p:?}");
assert_eq!(p.first().map(|s| s.as_str()), Some("a"));
assert_eq!(p.last().map(|s| s.as_str()), Some("d"));
assert!(cg.path_between("d", "a").is_none(), "graph is directed");
}
#[test]
fn extract_from_own_binary_runs() {
let bin = Path::new(env!("CARGO_MANIFEST_DIR")).join("target/debug/nornir");
if !bin.exists() {
eprintln!("skipping: {} not built", bin.display());
return;
}
let root = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let edges = extract_callgraph(&bin, &root).expect("extract");
eprintln!("got {} inline edges from {}", edges.len(), bin.display());
let cg = Callgraph::from_edges(&edges);
assert_eq!(cg.edge_count(), edges.len());
}
}