ud_analysis/lib.rs
1//! Analysis passes over loaded binaries.
2//!
3//! Function discovery layers signals from highest to lowest confidence:
4//!
5//! 1. The full symbol table (`.symtab`) when present.
6//! 2. The dynamic symbol table (`.dynsym`).
7//! 3. Byte-pattern signatures (CRT helpers, libc primitives, …).
8//! 4. `.eh_frame` (DWARF CFI) — names are addresses, but sizes are
9//! authoritative; survives stripping.
10//!
11//! Each [`Function`] in the produced [`FunctionMap`] records every
12//! source that contributed to it. Names from higher-confidence sources
13//! win over names from lower-confidence sources; sizes are merged
14//! preserving any non-zero value.
15//!
16//! After all sources are merged, a final pass fills in sizes for
17//! functions that no source supplied a size for (typically signature
18//! matches), using the distance to the next discovered function in
19//! the same address window.
20
21#![allow(clippy::cast_possible_truncation)]
22
23pub mod bpf_relocs;
24pub mod call_sites;
25mod eh_frame;
26mod entry;
27mod function_map;
28mod plt;
29mod signatures;
30mod symbols;
31
32pub use call_sites::{discover_from_bpf_call_sites, CallSiteError};
33pub use eh_frame::{discover_from_eh_frame, EhFrameError};
34pub use function_map::{Function, FunctionMap, FunctionSource};
35pub use plt::{discover_plt_thunks, PltError};
36pub use signatures::discover_from_signatures;
37pub use symbols::{discover_from_symbol_tables, SymbolError};
38
39use ud_core::VAddr;
40use ud_format::elf::Elf64File;
41
42/// Crate-level error type.
43#[derive(Debug, thiserror::Error)]
44pub enum Error {
45 #[error(transparent)]
46 Symbol(#[from] SymbolError),
47 #[error(transparent)]
48 EhFrame(#[from] EhFrameError),
49 #[error(transparent)]
50 Plt(#[from] PltError),
51 #[error(transparent)]
52 BpfReloc(#[from] bpf_relocs::BpfRelocError),
53 #[error(transparent)]
54 CallSite(#[from] CallSiteError),
55}
56
57pub type Result<T, E = Error> = std::result::Result<T, E>;
58
59/// Run every available discovery source and merge into a single
60/// [`FunctionMap`].
61///
62/// Sources are run in order of ascending confidence so that, when the
63/// merge resolves a name conflict, the higher-confidence record wins.
64/// Provenance (`Function::sources`) accumulates from every source that
65/// found the address.
66///
67/// After merging, a size-filling pass closes gaps for functions that
68/// were only found via signatures or other size-less sources.
69pub fn discover_functions(elf: &Elf64File) -> Result<FunctionMap> {
70 let mut map = FunctionMap::new();
71
72 // ELF `e_entry` — the loader-invoked entry. For Solana BPF
73 // programs nothing in `.text` calls it, so without this
74 // source the address would get buried inside an earlier
75 // function whose body spans the entry's address. Naming
76 // is `entry_point` for BPF / SBF, `_start` otherwise.
77 for f in entry::discover_entry_point(elf) {
78 map.insert(f);
79 }
80 for f in discover_from_eh_frame(elf)? {
81 map.insert(f);
82 }
83 // BPF / SBF: harvest function entries from local `call`
84 // targets. Stripped Solana programs only expose
85 // entrypoint + custom_panic in `.dynsym`; every other
86 // function lives behind a `call` somewhere. We need the
87 // syscall relocation map to exclude syscall call sites
88 // (whose `imm` is a helper id or Murmur3 hash, not a code
89 // offset).
90 let bpf_syscalls = bpf_relocs::build_call_site_names(elf)?;
91 for f in discover_from_bpf_call_sites(elf, &bpf_syscalls)? {
92 map.insert(f);
93 }
94 for f in discover_from_signatures(elf) {
95 map.insert(f);
96 }
97 for f in discover_plt_thunks(elf)? {
98 map.insert(f);
99 }
100 for f in discover_from_symbol_tables(elf)? {
101 map.insert(f);
102 }
103
104 fill_in_sizes_from_neighbors(&mut map, elf);
105
106 Ok(map)
107}
108
109/// For each function with `size == 0`, set its size to the distance to
110/// the next function in the same executable section (or to the end of
111/// the section, if it's the last). Functions without a containing
112/// executable section keep `size = 0`.
113fn fill_in_sizes_from_neighbors(map: &mut FunctionMap, elf: &Elf64File) {
114 use ud_format::elf::SHF_EXECINSTR;
115
116 let func_addrs: Vec<u64> = map.iter().map(|f| f.addr.0).collect();
117
118 let exec_sections: Vec<(u64, u64)> = elf
119 .sections()
120 .filter(|(_, sh, _)| sh.sh_flags & SHF_EXECINSTR != 0 && sh.sh_size > 0)
121 .map(|(_, sh, _)| (sh.sh_addr, sh.sh_addr.saturating_add(sh.sh_size)))
122 .collect();
123
124 let zero_sized: Vec<u64> = map
125 .iter()
126 .filter(|f| f.size == 0)
127 .map(|f| f.addr.0)
128 .collect();
129
130 for addr in zero_sized {
131 let Some(&(_, sec_end)) = exec_sections.iter().find(|(s, e)| addr >= *s && addr < *e)
132 else {
133 continue;
134 };
135 let next_in_section = func_addrs
136 .iter()
137 .copied()
138 .filter(|&a| a > addr && a < sec_end)
139 .min()
140 .unwrap_or(sec_end);
141 let new_size = next_in_section.saturating_sub(addr);
142 map.insert(Function {
143 addr: VAddr(addr),
144 size: new_size,
145 name: String::new(), // ignored on merge: name from existing wins (higher source)
146 sources: Vec::new(),
147 });
148 }
149}