Skip to main content

ud_analysis/
lib.rs

1//! Analysis passes over loaded binaries.
2//!
3//! Function discovery layers signals from highest to lowest confidence:
4//!
5//! 1. The full symbol table (`.symtab`) when present.
6//! 2. The dynamic symbol table (`.dynsym`).
7//! 3. Byte-pattern signatures (CRT helpers, libc primitives, …).
8//! 4. `.eh_frame` (DWARF CFI) — names are addresses, but sizes are
9//!    authoritative; survives stripping.
10//!
11//! Each [`Function`] in the produced [`FunctionMap`] records every
12//! source that contributed to it. Names from higher-confidence sources
13//! win over names from lower-confidence sources; sizes are merged
14//! preserving any non-zero value.
15//!
16//! After all sources are merged, a final pass fills in sizes for
17//! functions that no source supplied a size for (typically signature
18//! matches), using the distance to the next discovered function in
19//! the same address window.
20
21#![allow(clippy::cast_possible_truncation)]
22
23pub mod bpf_relocs;
24pub mod call_sites;
25mod eh_frame;
26mod entry;
27mod function_map;
28mod plt;
29mod signatures;
30mod symbols;
31
32pub use call_sites::{discover_from_bpf_call_sites, CallSiteError};
33pub use eh_frame::{discover_from_eh_frame, EhFrameError};
34pub use function_map::{Function, FunctionMap, FunctionSource};
35pub use plt::{discover_plt_thunks, PltError};
36pub use signatures::discover_from_signatures;
37pub use symbols::{discover_from_symbol_tables, SymbolError};
38
39use ud_core::VAddr;
40use ud_format::elf::Elf64File;
41
42/// Crate-level error type.
43#[derive(Debug, thiserror::Error)]
44pub enum Error {
45    #[error(transparent)]
46    Symbol(#[from] SymbolError),
47    #[error(transparent)]
48    EhFrame(#[from] EhFrameError),
49    #[error(transparent)]
50    Plt(#[from] PltError),
51    #[error(transparent)]
52    BpfReloc(#[from] bpf_relocs::BpfRelocError),
53    #[error(transparent)]
54    CallSite(#[from] CallSiteError),
55}
56
57pub type Result<T, E = Error> = std::result::Result<T, E>;
58
59/// Run every available discovery source and merge into a single
60/// [`FunctionMap`].
61///
62/// Sources are run in order of ascending confidence so that, when the
63/// merge resolves a name conflict, the higher-confidence record wins.
64/// Provenance (`Function::sources`) accumulates from every source that
65/// found the address.
66///
67/// After merging, a size-filling pass closes gaps for functions that
68/// were only found via signatures or other size-less sources.
69pub fn discover_functions(elf: &Elf64File) -> Result<FunctionMap> {
70    let mut map = FunctionMap::new();
71
72    // ELF `e_entry` — the loader-invoked entry. For Solana BPF
73    // programs nothing in `.text` calls it, so without this
74    // source the address would get buried inside an earlier
75    // function whose body spans the entry's address. Naming
76    // is `entry_point` for BPF / SBF, `_start` otherwise.
77    for f in entry::discover_entry_point(elf) {
78        map.insert(f);
79    }
80    for f in discover_from_eh_frame(elf)? {
81        map.insert(f);
82    }
83    // BPF / SBF: harvest function entries from local `call`
84    // targets. Stripped Solana programs only expose
85    // entrypoint + custom_panic in `.dynsym`; every other
86    // function lives behind a `call` somewhere. We need the
87    // syscall relocation map to exclude syscall call sites
88    // (whose `imm` is a helper id or Murmur3 hash, not a code
89    // offset).
90    let bpf_syscalls = bpf_relocs::build_call_site_names(elf)?;
91    for f in discover_from_bpf_call_sites(elf, &bpf_syscalls)? {
92        map.insert(f);
93    }
94    for f in discover_from_signatures(elf) {
95        map.insert(f);
96    }
97    for f in discover_plt_thunks(elf)? {
98        map.insert(f);
99    }
100    for f in discover_from_symbol_tables(elf)? {
101        map.insert(f);
102    }
103
104    fill_in_sizes_from_neighbors(&mut map, elf);
105
106    Ok(map)
107}
108
109/// For each function with `size == 0`, set its size to the distance to
110/// the next function in the same executable section (or to the end of
111/// the section, if it's the last). Functions without a containing
112/// executable section keep `size = 0`.
113fn fill_in_sizes_from_neighbors(map: &mut FunctionMap, elf: &Elf64File) {
114    use ud_format::elf::SHF_EXECINSTR;
115
116    let func_addrs: Vec<u64> = map.iter().map(|f| f.addr.0).collect();
117
118    let exec_sections: Vec<(u64, u64)> = elf
119        .sections()
120        .filter(|(_, sh, _)| sh.sh_flags & SHF_EXECINSTR != 0 && sh.sh_size > 0)
121        .map(|(_, sh, _)| (sh.sh_addr, sh.sh_addr.saturating_add(sh.sh_size)))
122        .collect();
123
124    let zero_sized: Vec<u64> = map
125        .iter()
126        .filter(|f| f.size == 0)
127        .map(|f| f.addr.0)
128        .collect();
129
130    for addr in zero_sized {
131        let Some(&(_, sec_end)) = exec_sections.iter().find(|(s, e)| addr >= *s && addr < *e)
132        else {
133            continue;
134        };
135        let next_in_section = func_addrs
136            .iter()
137            .copied()
138            .filter(|&a| a > addr && a < sec_end)
139            .min()
140            .unwrap_or(sec_end);
141        let new_size = next_in_section.saturating_sub(addr);
142        map.insert(Function {
143            addr: VAddr(addr),
144            size: new_size,
145            name: String::new(), // ignored on merge: name from existing wins (higher source)
146            sources: Vec::new(),
147        });
148    }
149}