nornir 0.4.21

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
Documentation
//! Direct-call extraction from textual LLVM IR (`.ll`).
//!
//! Complements `callgraph_dwarf` (inline edges only):
//!   - DWARF gives us the inlined subroutine tree.
//!   - LLVM IR gives us every direct `call @callee` site, inlined or not.
//!
//! Workflow:
//!
//! ```text
//! cargo +nightly rustc --release -- --emit=llvm-ir
//! nornir introspect callgraph-llvm target/release/deps/*.ll
//! ```
//!
//! Indirect calls (function pointers, trait objects) appear as `call %vreg`
//! and are intentionally skipped — same honest scope as DWARF inline edges.

use std::path::{Path, PathBuf};

use anyhow::{Context, Result};

use super::callgraph_dwarf::{CallEdge, CallKind};

/// Extract direct-call edges from one or more `.ll` files.
///
/// `workspace_crates` (optional) restricts edges so at least one endpoint
/// belongs to a crate in the set. Pass `None` to keep every non-intrinsic
/// edge. Example values: `Some(&["nornir", "holger"])`.
pub fn extract_from_files(paths: &[PathBuf], workspace_crates: Option<&[&str]>) -> Result<Vec<CallEdge>> {
    let mut out = Vec::new();
    for p in paths {
        let text = std::fs::read_to_string(p)
            .with_context(|| format!("read {}", p.display()))?;
        parse_module(&text, &mut out);
    }
    if let Some(crates) = workspace_crates {
        out.retain(|e| in_crates(&e.caller, crates) || in_crates(&e.callee, crates));
    }
    out.retain(|e| !is_skippable(&e.callee));
    dedupe(&mut out);
    Ok(out)
}

/// Convenience: glob `<dir>/**/*.ll` and parse all.
pub fn extract_from_dir(dir: &Path, workspace_crates: Option<&[&str]>) -> Result<Vec<CallEdge>> {
    let mut files = Vec::new();
    collect_ll(dir, &mut files)?;
    extract_from_files(&files, workspace_crates)
}

fn collect_ll(dir: &Path, out: &mut Vec<PathBuf>) -> Result<()> {
    for entry in std::fs::read_dir(dir).with_context(|| format!("read_dir {}", dir.display()))? {
        let e = entry?;
        let p = e.path();
        if p.is_dir() {
            collect_ll(&p, out)?;
        } else if p.extension().and_then(|s| s.to_str()) == Some("ll") {
            out.push(p);
        }
    }
    Ok(())
}

fn parse_module(text: &str, out: &mut Vec<CallEdge>) {
    let mut current: Option<String> = None;
    let mut in_func = false;
    let mut brace_depth: i32 = 0;
    for raw in text.lines() {
        let line = raw.trim_start();
        if !in_func {
            if let Some(rest) = line.strip_prefix("define ") {
                if let Some(name) = scan_global_name(rest) {
                    current = Some(demangle(&name));
                    if rest.contains('{') {
                        in_func = true;
                        brace_depth = 1;
                    } else {
                        in_func = true;
                        brace_depth = 0;
                    }
                }
            }
            continue;
        }
        for ch in line.chars() {
            match ch {
                '{' => brace_depth += 1,
                '}' => {
                    brace_depth -= 1;
                    if brace_depth <= 0 {
                        in_func = false;
                        current = None;
                        brace_depth = 0;
                        break;
                    }
                }
                _ => {}
            }
        }
        if !in_func {
            continue;
        }
        // Look for call/invoke that has a global @callee. Skip if no '@' before any '%' that would mean indirect.
        let lowered = line;
        if !(lowered.starts_with("call ") || lowered.starts_with("invoke ")
            || lowered.contains(" call ") || lowered.contains(" invoke ")
            || lowered.contains("tail call ") || lowered.contains("musttail call ")
            || lowered.contains("notail call "))
        {
            continue;
        }
        if let Some(callee) = scan_call_target(lowered) {
            if let Some(caller) = current.as_ref() {
                let d = demangle(&callee);
                if caller != &d {
                    out.push(CallEdge { caller: caller.clone(), callee: d, kind: CallKind::Direct });
                }
            }
        }
    }
}

/// From the tail of a `define` line, pull out the first `@NAME` or `@"NAME"`.
fn scan_global_name(s: &str) -> Option<String> {
    let at = s.find('@')?;
    let rest = &s[at + 1..];
    read_ident(rest)
}

/// From a `call`/`invoke` line, find the call target. Returns `None` for
/// indirect calls (target is a `%vreg`).
fn scan_call_target(s: &str) -> Option<String> {
    // Find the first '@' that isn't inside a parameter list parenthesis.
    // Simple heuristic: take the '@' that appears *before* the first '(' on the line.
    let lp = s.find('(').unwrap_or(s.len());
    let prefix = &s[..lp];
    let at = prefix.rfind('@')?;
    let rest = &prefix[at + 1..];
    read_ident(rest)
}

fn read_ident(s: &str) -> Option<String> {
    let bytes = s.as_bytes();
    if bytes.first() == Some(&b'"') {
        // quoted name: read until next unescaped quote
        let mut i = 1;
        while i < bytes.len() && bytes[i] != b'"' {
            if bytes[i] == b'\\' && i + 1 < bytes.len() {
                i += 2;
            } else {
                i += 1;
            }
        }
        if i >= bytes.len() {
            return None;
        }
        Some(s[1..i].to_string())
    } else {
        // bare identifier: [A-Za-z0-9_.$]+
        let end = bytes
            .iter()
            .position(|b| !(b.is_ascii_alphanumeric() || *b == b'_' || *b == b'.' || *b == b'$'))
            .unwrap_or(bytes.len());
        if end == 0 {
            None
        } else {
            Some(s[..end].to_string())
        }
    }
}

fn demangle(s: &str) -> String {
    match rustc_demangle::try_demangle(s) {
        Ok(d) => format!("{:#}", d),
        Err(_) => s.to_string(),
    }
}

fn is_skippable(callee: &str) -> bool {
    callee.starts_with("llvm.")
        || callee.starts_with("__rust_")
        || callee.starts_with("__llvm_")
        || callee.starts_with("__cxa_")
        || callee == "memcpy"
        || callee == "memset"
        || callee == "memmove"
        || callee.starts_with("__memcpy")
        || callee.starts_with("__memset")
}

fn in_crates(name: &str, crates: &[&str]) -> bool {
    crates.iter().any(|c| name.starts_with(&format!("{c}::")) || name == *c)
}

fn dedupe(edges: &mut Vec<CallEdge>) {
    use std::collections::HashSet;
    let mut seen: HashSet<(String, String)> = HashSet::new();
    edges.retain(|e| seen.insert((e.caller.clone(), e.callee.clone())));
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parses_simple_direct_call() {
        let ir = r#"
; ModuleID = 'test'

define void @"_ZN4demo3foo17h0123456789abcdefE"() {
entry:
  call void @"_ZN4demo3bar17hdeadbeefdeadbeefE"()
  ret void
}

define void @"_ZN4demo3bar17hdeadbeefdeadbeefE"() {
entry:
  ret void
}
"#;
        let mut edges = Vec::new();
        parse_module(ir, &mut edges);
        dedupe(&mut edges);
        assert_eq!(edges.len(), 1);
        assert!(edges[0].caller.contains("demo::foo"), "caller={}", edges[0].caller);
        assert!(edges[0].callee.contains("demo::bar"), "callee={}", edges[0].callee);
        assert_eq!(edges[0].kind, CallKind::Direct);
    }

    #[test]
    fn skips_intrinsics_and_indirect() {
        let ir = r#"
define void @"_ZN4demo3foo17h0a0a0a0a0a0a0a0aE"() {
entry:
  call void @llvm.memcpy.p0.p0.i64(ptr %0, ptr %1, i64 16, i1 false)
  %fn = load ptr, ptr %vt
  call void %fn()
  call void @"_ZN4demo3bar17h0b0b0b0b0b0b0b0bE"()
  ret void
}
"#;
        let mut edges = Vec::new();
        parse_module(ir, &mut edges);
        assert!(edges.iter().any(|e| e.callee.contains("demo::bar")));
        edges.retain(|e| !is_skippable(&e.callee));
        assert_eq!(edges.len(), 1);
    }

    #[test]
    fn tail_and_invoke_recognized() {
        let ir = r#"
define void @"_ZN4demo3foo17h0a0a0a0a0a0a0a0aE"() {
entry:
  tail call void @"_ZN4demo1a17h0b0b0b0b0b0b0b0bE"()
  invoke void @"_ZN4demo1b17h0c0c0c0c0c0c0c0cE"() to label %ok unwind label %bad
ok:
  ret void
bad:
  ret void
}
"#;
        let mut edges = Vec::new();
        parse_module(ir, &mut edges);
        dedupe(&mut edges);
        assert_eq!(edges.len(), 2);
    }

    #[test]
    fn workspace_filter_keeps_endpoints_in_set() {
        let edges = vec![
            CallEdge { caller: "nornir::a".into(), callee: "std::vec::Vec::new".into(), kind: CallKind::Direct },
            CallEdge { caller: "std::clone".into(), callee: "core::mem::drop".into(), kind: CallKind::Direct },
        ];
        let mut filtered = edges.clone();
        filtered.retain(|e| in_crates(&e.caller, &["nornir"]) || in_crates(&e.callee, &["nornir"]));
        assert_eq!(filtered.len(), 1);
        assert!(filtered[0].caller.starts_with("nornir::"));
    }
}