1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
use std::collections::HashMap;
use std::fs::File;
use std::path::Path;
use anyhow::{format_err, Error};
use goblin;
use goblin::Object;
use memmap2::Mmap;
/// Metadata for a program, including its symbols, BSS section, location in memory, etc.
pub struct BinaryInfo {
/// The filesystem path to the binary
pub filename: std::path::PathBuf,
/// A map of symbols declared by the binary
pub symbols: HashMap<String, u64>,
/// The address of the BSS section
pub bss_addr: u64,
/// The size in bytes of the BSS section
pub bss_size: u64,
/// [TODO: how does this differ from `addr`?]
pub offset: u64,
/// The address of the binary in memory
pub addr: u64,
/// The size in bytes of the binary
pub size: u64,
}
impl BinaryInfo {
/// Returns `true` if `addr` is inside of the process's memory space.
pub fn contains(&self, addr: u64) -> bool {
addr >= self.addr && addr < (self.addr + self.size)
}
}
/// Parses a binary file by path (or by PID on Linux).
pub fn parse_binary(
_pid: remoteprocess::Pid,
filename: &Path,
addr: u64,
size: u64,
_is_bin: bool,
) -> Result<BinaryInfo, Error> {
// on linux the process could be running in docker, access the filename through procfs
// if filename is the binary executable (not a DSO) - take it from /proc/pid/exe, which works
// across namespaces just like /proc/pid/root, and also if the file was deleted.
#[cfg(target_os = "linux")]
let filename = &std::path::PathBuf::from(&if _is_bin {
format!("/proc/{}/exe", _pid)
} else {
format!("/proc/{}/root{}", _pid, filename.display())
});
let offset = addr;
let mut symbols = HashMap::new();
// Read in the filename
let file = File::open(filename)?;
let buffer = unsafe { Mmap::map(&file)? };
// Use goblin to parse the binary
match Object::parse(&buffer)? {
Object::Mach(mach) => {
// Get the mach binary from the archive
let mach = match mach {
goblin::mach::Mach::Binary(mach) => mach,
goblin::mach::Mach::Fat(fat) => {
let arch = fat
.iter_arches()
.find(|arch| match arch {
Ok(arch) => arch.is_64(),
Err(_) => false,
})
.ok_or_else(|| {
format_err!(
"Failed to find 64 bit arch in FAT archive in {}",
filename.display()
)
})??;
let bytes = &buffer[arch.offset as usize..][..arch.size as usize];
goblin::mach::MachO::parse(bytes, 0)?
}
};
let mut bss_addr = 0;
let mut bss_size = 0;
for segment in mach.segments.iter() {
for (section, _) in &segment.sections()? {
if section.name()? == "__bss" {
bss_addr = section.addr + offset;
bss_size = section.size;
}
}
}
if let Some(syms) = mach.symbols {
for symbol in syms.iter() {
let (name, value) = symbol?;
// almost every symbol we care about starts with an extra _, remove to normalize
// with the entries seen on linux/windows
if name.starts_with('_') && !value.is_undefined() {
symbols.insert(name[1..].to_string(), value.n_value + offset);
}
}
}
Ok(BinaryInfo {
filename: filename.to_owned(),
symbols,
bss_addr,
bss_size,
offset,
addr,
size,
})
}
Object::Elf(elf) => {
let bss_header = elf
.section_headers
.iter()
.find(|ref header| header.sh_type == goblin::elf::section_header::SHT_NOBITS)
.ok_or_else(|| {
format_err!(
"Failed to find BSS section header in {}",
filename.display()
)
})?;
let program_header = elf
.program_headers
.iter()
.find(|ref header| {
header.p_type == goblin::elf::program_header::PT_LOAD
&& header.p_flags & goblin::elf::program_header::PF_X != 0
})
.ok_or_else(|| {
format_err!(
"Failed to find executable PT_LOAD program header in {}",
filename.display()
)
})?;
// p_vaddr may be larger than the map address in case when the header has an offset and
// the map address is relatively small. In this case we can default to 0.
let offset = offset.checked_sub(program_header.p_vaddr).unwrap_or(0);
for sym in elf.syms.iter() {
let name = elf.strtab[sym.st_name].to_string();
symbols.insert(name, sym.st_value + offset);
}
for dynsym in elf.dynsyms.iter() {
let name = elf.dynstrtab[dynsym.st_name].to_string();
symbols.insert(name, dynsym.st_value + offset);
}
Ok(BinaryInfo {
filename: filename.to_owned(),
symbols,
bss_addr: bss_header.sh_addr + offset,
bss_size: bss_header.sh_size,
offset,
addr,
size,
})
}
Object::PE(pe) => {
for export in pe.exports {
if let (Some(name), Some(export_offset)) = (export.name, export.offset) {
symbols.insert(name.to_string(), export_offset as u64 + offset as u64);
}
}
pe.sections
.iter()
.find(|ref section| section.name.starts_with(b".data"))
.ok_or_else(|| {
format_err!(
"Failed to find .data section in PE binary of {}",
filename.display()
)
})
.map(|data_section| {
let bss_addr = u64::from(data_section.virtual_address) + offset;
let bss_size = u64::from(data_section.virtual_size);
BinaryInfo {
filename: filename.to_owned(),
symbols,
bss_addr,
bss_size,
offset,
addr,
size,
}
})
}
_ => Err(format_err!("Unhandled binary type")),
}
}