use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use super::callgraph_dwarf::{CallEdge, CallKind};
pub fn extract_from_files(paths: &[PathBuf], workspace_crates: Option<&[&str]>) -> Result<Vec<CallEdge>> {
let mut out = Vec::new();
for p in paths {
let text = std::fs::read_to_string(p)
.with_context(|| format!("read {}", p.display()))?;
parse_module(&text, &mut out);
}
if let Some(crates) = workspace_crates {
out.retain(|e| in_crates(&e.caller, crates) || in_crates(&e.callee, crates));
}
out.retain(|e| !is_skippable(&e.callee));
dedupe(&mut out);
Ok(out)
}
pub fn extract_from_dir(dir: &Path, workspace_crates: Option<&[&str]>) -> Result<Vec<CallEdge>> {
let mut files = Vec::new();
collect_ll(dir, &mut files)?;
extract_from_files(&files, workspace_crates)
}
fn collect_ll(dir: &Path, out: &mut Vec<PathBuf>) -> Result<()> {
for entry in std::fs::read_dir(dir).with_context(|| format!("read_dir {}", dir.display()))? {
let e = entry?;
let p = e.path();
if p.is_dir() {
collect_ll(&p, out)?;
} else if p.extension().and_then(|s| s.to_str()) == Some("ll") {
out.push(p);
}
}
Ok(())
}
fn parse_module(text: &str, out: &mut Vec<CallEdge>) {
let mut current: Option<String> = None;
let mut in_func = false;
let mut brace_depth: i32 = 0;
for raw in text.lines() {
let line = raw.trim_start();
if !in_func {
if let Some(rest) = line.strip_prefix("define ") {
if let Some(name) = scan_global_name(rest) {
current = Some(demangle(&name));
if rest.contains('{') {
in_func = true;
brace_depth = 1;
} else {
in_func = true;
brace_depth = 0;
}
}
}
continue;
}
for ch in line.chars() {
match ch {
'{' => brace_depth += 1,
'}' => {
brace_depth -= 1;
if brace_depth <= 0 {
in_func = false;
current = None;
brace_depth = 0;
break;
}
}
_ => {}
}
}
if !in_func {
continue;
}
let lowered = line;
if !(lowered.starts_with("call ") || lowered.starts_with("invoke ")
|| lowered.contains(" call ") || lowered.contains(" invoke ")
|| lowered.contains("tail call ") || lowered.contains("musttail call ")
|| lowered.contains("notail call "))
{
continue;
}
if let Some(callee) = scan_call_target(lowered) {
if let Some(caller) = current.as_ref() {
let d = demangle(&callee);
if caller != &d {
out.push(CallEdge { caller: caller.clone(), callee: d, kind: CallKind::Direct });
}
}
}
}
}
fn scan_global_name(s: &str) -> Option<String> {
let at = s.find('@')?;
let rest = &s[at + 1..];
read_ident(rest)
}
fn scan_call_target(s: &str) -> Option<String> {
let lp = s.find('(').unwrap_or(s.len());
let prefix = &s[..lp];
let at = prefix.rfind('@')?;
let rest = &prefix[at + 1..];
read_ident(rest)
}
fn read_ident(s: &str) -> Option<String> {
let bytes = s.as_bytes();
if bytes.first() == Some(&b'"') {
let mut i = 1;
while i < bytes.len() && bytes[i] != b'"' {
if bytes[i] == b'\\' && i + 1 < bytes.len() {
i += 2;
} else {
i += 1;
}
}
if i >= bytes.len() {
return None;
}
Some(s[1..i].to_string())
} else {
let end = bytes
.iter()
.position(|b| !(b.is_ascii_alphanumeric() || *b == b'_' || *b == b'.' || *b == b'$'))
.unwrap_or(bytes.len());
if end == 0 {
None
} else {
Some(s[..end].to_string())
}
}
}
fn demangle(s: &str) -> String {
match rustc_demangle::try_demangle(s) {
Ok(d) => format!("{:#}", d),
Err(_) => s.to_string(),
}
}
fn is_skippable(callee: &str) -> bool {
callee.starts_with("llvm.")
|| callee.starts_with("__rust_")
|| callee.starts_with("__llvm_")
|| callee.starts_with("__cxa_")
|| callee == "memcpy"
|| callee == "memset"
|| callee == "memmove"
|| callee.starts_with("__memcpy")
|| callee.starts_with("__memset")
}
fn in_crates(name: &str, crates: &[&str]) -> bool {
crates.iter().any(|c| name.starts_with(&format!("{c}::")) || name == *c)
}
fn dedupe(edges: &mut Vec<CallEdge>) {
use std::collections::HashSet;
let mut seen: HashSet<(String, String)> = HashSet::new();
edges.retain(|e| seen.insert((e.caller.clone(), e.callee.clone())));
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_simple_direct_call() {
let ir = r#"
; ModuleID = 'test'
define void @"_ZN4demo3foo17h0123456789abcdefE"() {
entry:
call void @"_ZN4demo3bar17hdeadbeefdeadbeefE"()
ret void
}
define void @"_ZN4demo3bar17hdeadbeefdeadbeefE"() {
entry:
ret void
}
"#;
let mut edges = Vec::new();
parse_module(ir, &mut edges);
dedupe(&mut edges);
assert_eq!(edges.len(), 1);
assert!(edges[0].caller.contains("demo::foo"), "caller={}", edges[0].caller);
assert!(edges[0].callee.contains("demo::bar"), "callee={}", edges[0].callee);
assert_eq!(edges[0].kind, CallKind::Direct);
}
#[test]
fn skips_intrinsics_and_indirect() {
let ir = r#"
define void @"_ZN4demo3foo17h0a0a0a0a0a0a0a0aE"() {
entry:
call void @llvm.memcpy.p0.p0.i64(ptr %0, ptr %1, i64 16, i1 false)
%fn = load ptr, ptr %vt
call void %fn()
call void @"_ZN4demo3bar17h0b0b0b0b0b0b0b0bE"()
ret void
}
"#;
let mut edges = Vec::new();
parse_module(ir, &mut edges);
assert!(edges.iter().any(|e| e.callee.contains("demo::bar")));
edges.retain(|e| !is_skippable(&e.callee));
assert_eq!(edges.len(), 1);
}
#[test]
fn tail_and_invoke_recognized() {
let ir = r#"
define void @"_ZN4demo3foo17h0a0a0a0a0a0a0a0aE"() {
entry:
tail call void @"_ZN4demo1a17h0b0b0b0b0b0b0b0bE"()
invoke void @"_ZN4demo1b17h0c0c0c0c0c0c0c0cE"() to label %ok unwind label %bad
ok:
ret void
bad:
ret void
}
"#;
let mut edges = Vec::new();
parse_module(ir, &mut edges);
dedupe(&mut edges);
assert_eq!(edges.len(), 2);
}
#[test]
fn workspace_filter_keeps_endpoints_in_set() {
let edges = vec![
CallEdge { caller: "nornir::a".into(), callee: "std::vec::Vec::new".into(), kind: CallKind::Direct },
CallEdge { caller: "std::clone".into(), callee: "core::mem::drop".into(), kind: CallKind::Direct },
];
let mut filtered = edges.clone();
filtered.retain(|e| in_crates(&e.caller, &["nornir"]) || in_crates(&e.callee, &["nornir"]));
assert_eq!(filtered.len(), 1);
assert!(filtered[0].caller.starts_with("nornir::"));
}
}