py_spy_for_datakit/
binary_parser.rs

1
2use std::collections::HashMap;
3use std::fs::File;
4use std::path::Path;
5
6use anyhow::Error;
7use goblin;
8use goblin::Object;
9use memmap::Mmap;
10
11pub struct BinaryInfo {
12    pub filename: std::path::PathBuf,
13    pub symbols: HashMap<String, u64>,
14    pub bss_addr: u64,
15    pub bss_size: u64,
16    pub offset: u64,
17    pub addr: u64,
18    pub size: u64
19}
20
21impl BinaryInfo {
22    #[cfg(unwind)]
23    pub fn contains(&self, addr: u64) -> bool {
24        addr >= self.addr && addr < (self.addr + self.size)
25    }
26}
27
28/// Uses goblin to parse a binary file, returns information on symbols/bss/adjusted offset etc
29pub fn parse_binary(_pid: remoteprocess::Pid, filename: &Path, addr: u64, size: u64, _is_bin: bool) -> Result<BinaryInfo, Error> {
30    // on linux the process could be running in docker, access the filename through procfs
31    // if filename is the binary executable (not libpython) - take it from /proc/pid/exe, which works
32    // across namespaces just like /proc/pid/root, and also if the file was deleted.
33    #[cfg(target_os="linux")]
34    let filename = &std::path::PathBuf::from(&if _is_bin {
35        format!("/proc/{}/exe", _pid)
36    } else {
37        format!("/proc/{}/root{}", _pid, filename.display())
38    });
39
40    let offset = addr;
41
42    let mut symbols = HashMap::new();
43
44    // Read in the filename
45    let file = File::open(filename)?;
46    let buffer = unsafe { Mmap::map(&file)? };
47
48    // Use goblin to parse the binary
49    match Object::parse(&buffer)? {
50        Object::Mach(mach) => {
51            // Get the mach binary from the archive
52            let mach = match mach {
53                goblin::mach::Mach::Binary(mach) => mach,
54                goblin::mach::Mach::Fat(fat) => {
55                    let arch = fat.iter_arches().find(|arch|
56                        match arch {
57                            Ok(arch) => arch.is_64(),
58                            Err(_) => false
59                        }
60                    ).ok_or_else(|| format_err!("Failed to find 64 bit arch in FAT archive in {}", filename.display()))??;
61                    let bytes = &buffer[arch.offset as usize..][..arch.size as usize];
62                    goblin::mach::MachO::parse(bytes, 0)?
63                }
64            };
65
66            let mut bss_addr = 0;
67            let mut bss_size = 0;
68            for segment in mach.segments.iter() {
69                for (section, _) in &segment.sections()? {
70                    if section.name()? == "__bss" {
71                        bss_addr = section.addr + offset;
72                        bss_size = section.size;
73                    }
74                }
75            }
76
77            if let Some(syms) = mach.symbols {
78                for symbol in syms.iter() {
79                    let (name, value) = symbol?;
80                    // almost every symbol we care about starts with an extra _, remove to normalize
81                    // with the entries seen on linux/windows
82                    if name.starts_with('_') {
83                        symbols.insert(name[1..].to_string(), value.n_value + offset);
84                    }
85
86                }
87            }
88            Ok(BinaryInfo{filename: filename.to_owned(), symbols, bss_addr, bss_size, offset, addr, size})
89        }
90
91        Object::Elf(elf) => {
92            let bss_header = elf.section_headers
93                .iter()
94                .find(|ref header| header.sh_type == goblin::elf::section_header::SHT_NOBITS)
95                .ok_or_else(|| format_err!("Failed to find BSS section header in {}", filename.display()))?;
96
97            let program_header = elf.program_headers
98                .iter()
99                .find(|ref header|
100                    header.p_type == goblin::elf::program_header::PT_LOAD &&
101                    header.p_flags & goblin::elf::program_header::PF_X != 0)
102                .ok_or_else(|| format_err!("Failed to find executable PT_LOAD program header in {}", filename.display()))?;
103
104            // p_vaddr may be larger than the map address in case when the header has an offset and
105            // the map address is relatively small. In this case we can default to 0.
106            let offset = offset.checked_sub(program_header.p_vaddr).unwrap_or(0);
107
108            for sym in elf.syms.iter() {
109                let name = elf.strtab[sym.st_name].to_string();
110                symbols.insert(name, sym.st_value + offset);
111            }
112            for dynsym in elf.dynsyms.iter() {
113                let name = elf.dynstrtab[dynsym.st_name].to_string();
114                symbols.insert(name, dynsym.st_value + offset);
115            }
116            Ok(BinaryInfo{filename: filename.to_owned(),
117                          symbols,
118                          bss_addr: bss_header.sh_addr + offset,
119                          bss_size: bss_header.sh_size,
120                          offset,
121                          addr,
122                          size})
123        },
124        Object::PE(pe) => {
125            for export in pe.exports {
126                if let Some(name) = export.name {
127                    if let Some(export_offset) = export.offset {
128                        symbols.insert(name.to_string(), export_offset as u64 + offset as u64);
129                    }
130                }
131            }
132
133            pe.sections
134                .iter()
135                .find(|ref section| section.name.starts_with(b".data"))
136                .ok_or_else(|| format_err!("Failed to find .data section in PE binary of {}", filename.display()))
137                .map(|data_section| {
138                    let bss_addr = u64::from(data_section.virtual_address) + offset;
139                    let bss_size = u64::from(data_section.virtual_size);
140
141                    BinaryInfo{filename: filename.to_owned(), symbols, bss_addr, bss_size, offset, addr, size}
142                })
143        },
144        _ => {
145            Err(format_err!("Unhandled binary type"))
146        }
147    }
148}