1use std::collections::HashMap;
2use std::fs::File;
3use std::path::Path;
4
5use anyhow::Error;
6use goblin::Object;
7use memmap2::Mmap;
8
9use crate::utils::is_subrange;
10
11pub struct BinaryInfo {
12 pub symbols: HashMap<String, u64>,
13 pub bss_addr: u64,
14 pub bss_size: u64,
15 pub pyruntime_addr: u64,
16 pub pyruntime_size: u64,
17 #[allow(dead_code)]
18 pub addr: u64,
19 #[allow(dead_code)]
20 pub size: u64,
21}
22
23impl BinaryInfo {
24 #[cfg(feature = "unwind")]
25 pub fn contains(&self, addr: u64) -> bool {
26 addr >= self.addr && addr < (self.addr + self.size)
27 }
28}
29
30#[cfg(target_os = "macos")]
31fn get_mach_cpu_type() -> goblin::mach::cputype::CpuType {
32 let is_arm: i32 = 0;
33 let size: usize = std::mem::size_of_val(&is_arm);
34 unsafe {
35 let name = std::ffi::CString::new("hw.optional.arm64").expect("CString::new failed");
36 let ret = libc::sysctlbyname(
37 name.as_ptr() as *const i8,
38 &is_arm as *const _ as *mut _,
39 &size as *const _ as *mut _,
40 std::ptr::null_mut(),
41 0,
42 );
43 if ret != 0 {
44 warn!("failed to call 'libc::sysctlbyname(\"hw.optional.arm64\",...' - assume running on x86_64 ");
47 return goblin::mach::cputype::CPU_TYPE_X86_64;
48 }
49 }
50 if is_arm == 1 {
51 goblin::mach::cputype::CPU_TYPE_ARM64
52 } else {
53 goblin::mach::cputype::CPU_TYPE_X86_64
54 }
55}
56
57#[cfg(not(target_os = "macos"))]
58fn get_mach_cpu_type() -> goblin::mach::cputype::CpuType {
59 goblin::mach::cputype::CPU_TYPE_ANY
60}
61
62pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo, Error> {
64 let offset = addr;
65
66 let mut symbols = HashMap::new();
67
68 let file = File::open(filename)?;
70 let buffer = unsafe { Mmap::map(&file)? };
71
72 let mach_cputype = get_mach_cpu_type();
75
76 match Object::parse(&buffer)? {
78 Object::Mach(mach) => {
79 let mach = match mach {
81 goblin::mach::Mach::Binary(mach) => mach,
82 goblin::mach::Mach::Fat(fat) => {
83 let arch = fat
84 .iter_arches()
85 .find(|arch| match arch {
86 Ok(arch) => arch.is_64() && arch.cputype() == mach_cputype,
87 Err(_) => false,
88 })
89 .ok_or_else(|| {
90 format_err!(
91 "Failed to find 64 bit arch in FAT archive in {}",
92 filename.display()
93 )
94 })??;
95 if !is_subrange(0, buffer.len(), arch.offset as usize, arch.size as usize) {
96 return Err(format_err!(
97 "Invalid offset/size in FAT archive in {}",
98 filename.display()
99 ));
100 }
101 let bytes = &buffer[arch.offset as usize..][..arch.size as usize];
102 goblin::mach::MachO::parse(bytes, 0)?
103 }
104 };
105
106 let mut pyruntime_addr = 0;
107 let mut pyruntime_size = 0;
108 let mut bss_addr = 0;
109 let mut bss_size = 0;
110 for segment in mach.segments.iter() {
111 for (section, _) in &segment.sections()? {
112 let name = section.name()?;
113 if name == "PyRuntime" {
114 if let Some(addr) = section.addr.checked_add(offset) {
115 if addr.checked_add(section.size).is_some() {
116 pyruntime_addr = addr;
117 pyruntime_size = section.size;
118 }
119 }
120 }
121
122 if name == "__bss" {
123 if let Some(addr) = section.addr.checked_add(offset) {
124 if addr.checked_add(section.size).is_some() {
125 bss_addr = addr;
126 bss_size = section.size;
127 }
128 }
129 }
130 }
131 }
132
133 if let Some(syms) = mach.symbols {
134 for symbol in syms.iter() {
135 let (name, value) = symbol?;
136 if let Some(stripped_name) = name.strip_prefix('_') {
139 symbols.insert(stripped_name.to_string(), value.n_value + offset);
140 }
141 }
142 }
143 Ok(BinaryInfo {
144 symbols,
145 bss_addr,
146 bss_size,
147 pyruntime_addr,
148 pyruntime_size,
149 addr,
150 size,
151 })
152 }
153
154 Object::Elf(elf) => {
155 let strtab = elf.shdr_strtab;
156 let bss_header = elf
157 .section_headers
158 .iter()
159 .filter(|header| header.sh_type == goblin::elf::section_header::SHT_NOBITS)
161 .filter(|header| {
162 strtab
163 .get_at(header.sh_name)
164 .is_none_or(|name| name == ".bss")
165 })
166 .max_by_key(|header| header.sh_size)
168 .ok_or_else(|| {
169 format_err!(
170 "Failed to find BSS section header in {}",
171 filename.display()
172 )
173 })?;
174
175 let program_header = elf
176 .program_headers
177 .iter()
178 .find(|header| {
179 header.p_type == goblin::elf::program_header::PT_LOAD
180 && header.p_flags & goblin::elf::program_header::PF_X != 0
181 })
182 .ok_or_else(|| {
183 format_err!(
184 "Failed to find executable PT_LOAD program header in {}",
185 filename.display()
186 )
187 })?;
188
189 let aligned_vaddr =
192 program_header.p_vaddr - (program_header.p_vaddr % page_size::get() as u64);
193 let offset = offset.saturating_sub(aligned_vaddr);
194
195 let mut bss_addr = 0;
196 let mut bss_size = 0;
197 let mut bss_end = 0;
198 if let Some(addr) = bss_header.sh_addr.checked_add(offset) {
199 if bss_header.sh_size.checked_add(addr).is_none() {
200 return Err(format_err!(
201 "Invalid bss address/size in {}",
202 filename.display()
203 ));
204 }
205 bss_addr = addr;
206 bss_size = bss_header.sh_size;
207 bss_end = bss_header.sh_addr + bss_header.sh_size;
208 }
209
210 let pyruntime_header = elf
211 .section_headers
212 .iter()
213 .find(|header| strtab.get_at(header.sh_name) == Some(".PyRuntime"));
214
215 let mut pyruntime_addr = 0;
216 let mut pyruntime_size = 0;
217 if let Some(header) = pyruntime_header {
218 if let Some(addr) = header.sh_addr.checked_add(offset) {
219 pyruntime_addr = addr;
220 pyruntime_size = header.sh_size;
221 }
222 }
223
224 for sym in elf.syms.iter() {
225 if sym.st_shndx == goblin::elf::section_header::SHN_UNDEF as usize {
227 continue;
228 }
229 if sym.is_import()
231 || (bss_end != 0
232 && sym.st_size != 0
233 && !is_subrange(0u64, bss_end, sym.st_value, sym.st_size))
234 {
235 continue;
236 }
237 if let Some(pos) = sym.st_value.checked_add(offset) {
238 if sym.is_function() && !is_subrange(addr, size, pos, sym.st_size) {
239 continue;
240 }
241 if let Some(name) = elf.strtab.get_unsafe(sym.st_name) {
242 symbols.insert(name.to_string(), pos);
243 }
244 }
245 }
246 for dynsym in elf.dynsyms.iter() {
247 if dynsym.st_shndx == goblin::elf::section_header::SHN_UNDEF as usize {
249 continue;
250 }
251 if dynsym.is_import()
253 || (bss_end != 0
254 && dynsym.st_size != 0
255 && !is_subrange(0u64, bss_end, dynsym.st_value, dynsym.st_size))
256 {
257 continue;
258 }
259 if let Some(pos) = dynsym.st_value.checked_add(offset) {
260 if dynsym.is_function() && !is_subrange(addr, size, pos, dynsym.st_size) {
261 continue;
262 }
263 if let Some(name) = elf.dynstrtab.get_unsafe(dynsym.st_name) {
264 symbols.insert(name.to_string(), pos);
265 }
266 }
267 }
268
269 Ok(BinaryInfo {
270 symbols,
271 bss_addr,
272 bss_size,
273 pyruntime_addr,
274 pyruntime_size,
275 addr,
276 size,
277 })
278 }
279 Object::PE(pe) => {
280 for export in pe.exports {
281 if let Some(name) = export.name {
282 if let Some(addr) = offset.checked_add(export.rva as u64) {
283 symbols.insert(name.to_string(), addr);
284 }
285 }
286 }
287
288 let mut bss_addr = 0;
289 let mut bss_size = 0;
290 let mut pyruntime_addr = 0;
291 let mut pyruntime_size = 0;
292 let mut found_data = false;
293 for section in pe.sections.iter() {
294 if section.name.starts_with(b".data") {
295 found_data = true;
296 if let Some(addr) = offset.checked_add(section.virtual_address as u64) {
297 if addr.checked_add(section.virtual_size as u64).is_some() {
298 bss_addr = addr;
299 bss_size = u64::from(section.virtual_size);
300 }
301 }
302 } else if section.name.starts_with(b"PyRuntim") {
303 if let Some(addr) = offset.checked_add(section.virtual_address as u64) {
306 if addr.checked_add(section.virtual_size as u64).is_some() {
307 pyruntime_addr = addr;
308 pyruntime_size = u64::from(section.virtual_size);
309 }
310 }
311 }
312 }
313
314 if !found_data {
315 return Err(format_err!(
316 "Failed to find .data section in PE binary of {}",
317 filename.display()
318 ));
319 }
320
321 Ok(BinaryInfo {
322 symbols,
323 bss_addr,
324 bss_size,
325 pyruntime_size,
326 pyruntime_addr,
327 addr,
328 size,
329 })
330 }
331 _ => Err(format_err!("Unhandled binary type")),
332 }
333}