samply_symbols/
binary_image.rs

1use debugid::DebugId;
2use linux_perf_data::jitdump::JitDumpHeader;
3use linux_perf_data::linux_perf_event_reader::RawData;
4use object::read::pe::{ImageNtHeaders, ImageOptionalHeader, PeFile, PeFile32, PeFile64};
5use object::{FileKind, Object, ReadRef};
6
7use crate::debugid_util::{code_id_for_object, debug_id_for_object};
8use crate::error::Error;
9use crate::jitdump::{debug_id_and_code_id_for_jitdump, JitDumpIndex};
10use crate::macho::{DyldCacheFileData, MachOData, MachOFatArchiveMemberData};
11use crate::shared::{
12    relative_address_base, CodeId, ElfBuildId, FileAndPathHelperError, FileContents,
13    FileContentsWrapper, LibraryInfo, PeCodeId, RangeReadRef,
14};
15
16#[derive(thiserror::Error, Debug)]
17pub enum CodeByteReadingError {
18    #[error("The requested address was not found in any section in the binary.")]
19    AddressNotFound,
20
21    #[error("object parse error: {0}")]
22    ObjectParseError(#[from] object::Error),
23
24    #[error("Could not read the requested address range from the section (might be out of bounds or the section might not have any bytes in the file)")]
25    ByteRangeNotInSection,
26
27    #[error("Could not read the requested address range from the file: {0}")]
28    FileIO(#[from] FileAndPathHelperError),
29}
30
31pub struct BinaryImage<F: FileContents + 'static> {
32    inner: BinaryImageInner<F>,
33    info: LibraryInfo,
34}
35
36impl<F: FileContents + 'static> BinaryImage<F> {
37    pub(crate) fn new(
38        inner: BinaryImageInner<F>,
39        name: Option<String>,
40        path: Option<String>,
41    ) -> Result<Self, Error> {
42        let info = inner.make_library_info(name, path)?;
43        Ok(Self { inner, info })
44    }
45
46    pub fn library_info(&self) -> LibraryInfo {
47        self.info.clone()
48    }
49
50    pub fn debug_name(&self) -> Option<&str> {
51        self.info.debug_name.as_deref()
52    }
53
54    pub fn debug_id(&self) -> Option<DebugId> {
55        self.info.debug_id
56    }
57
58    pub fn debug_path(&self) -> Option<&str> {
59        self.info.debug_path.as_deref()
60    }
61
62    pub fn name(&self) -> Option<&str> {
63        self.info.name.as_deref()
64    }
65
66    pub fn code_id(&self) -> Option<CodeId> {
67        self.info.code_id.clone()
68    }
69
70    pub fn path(&self) -> Option<&str> {
71        self.info.path.as_deref()
72    }
73
74    pub fn arch(&self) -> Option<&str> {
75        self.info.arch.as_deref()
76    }
77
78    pub fn make_object(
79        &self,
80    ) -> Option<object::File<'_, RangeReadRef<'_, &'_ FileContentsWrapper<F>>>> {
81        self.inner
82            .make_object()
83            .expect("We already parsed this before, why is it not parsing now?")
84    }
85
86    pub fn read_bytes_at_relative_address(
87        &self,
88        start_address: u32,
89        size: u32,
90    ) -> Result<&[u8], CodeByteReadingError> {
91        self.inner
92            .read_bytes_at_relative_address(start_address, size)
93    }
94}
95
96pub enum BinaryImageInner<F: FileContents + 'static> {
97    Normal(FileContentsWrapper<F>, FileKind),
98    MemberOfFatArchive(MachOFatArchiveMemberData<F>, FileKind),
99    MemberOfDyldSharedCache(DyldCacheFileData<F>),
100    JitDump(FileContentsWrapper<F>, JitDumpIndex),
101}
102
103impl<F: FileContents> BinaryImageInner<F> {
104    fn make_library_info(
105        &self,
106        name: Option<String>,
107        path: Option<String>,
108    ) -> Result<LibraryInfo, Error> {
109        let (debug_id, code_id, debug_path, debug_name, arch) = match self {
110            BinaryImageInner::Normal(file, file_kind) => {
111                let data = file.full_range();
112                let object = object::File::parse(data)
113                    .map_err(|e| Error::ObjectParseError(*file_kind, e))?;
114                let debug_id = debug_id_for_object(&object);
115                match file_kind {
116                    FileKind::Pe32 | FileKind::Pe64 => {
117                        let (code_id, debug_path, debug_name) =
118                            if let Ok(pe) = PeFile64::parse(file) {
119                                pe_info(&pe).into_tuple()
120                            } else if let Ok(pe) = PeFile32::parse(file) {
121                                pe_info(&pe).into_tuple()
122                            } else {
123                                (None, None, None)
124                            };
125                        let arch =
126                            object_arch_to_string(object.architecture()).map(ToOwned::to_owned);
127                        (debug_id, code_id, debug_path, debug_name, arch)
128                    }
129                    FileKind::MachO32 | FileKind::MachO64 => {
130                        let macho_data = MachOData::new(file, 0, *file_kind == FileKind::MachO64);
131                        let code_id = code_id_for_object(&object);
132                        let arch = macho_data.get_arch().map(ToOwned::to_owned);
133                        let (debug_path, debug_name) = (path.clone(), name.clone());
134                        (debug_id, code_id, debug_path, debug_name, arch)
135                    }
136                    _ => {
137                        let code_id = code_id_for_object(&object);
138                        let (debug_path, debug_name) = (path.clone(), name.clone());
139                        let arch =
140                            object_arch_to_string(object.architecture()).map(ToOwned::to_owned);
141                        (debug_id, code_id, debug_path, debug_name, arch)
142                    }
143                }
144            }
145            BinaryImageInner::MemberOfFatArchive(member, file_kind) => {
146                let data = member.data();
147                let object = object::File::parse(data)
148                    .map_err(|e| Error::ObjectParseError(*file_kind, e))?;
149                let debug_id = debug_id_for_object(&object);
150                let code_id = code_id_for_object(&object);
151                let (debug_path, debug_name) = (path.clone(), name.clone());
152                let arch = member.arch();
153                (debug_id, code_id, debug_path, debug_name, arch)
154            }
155            BinaryImageInner::MemberOfDyldSharedCache(dyld_cache_file_data) => {
156                let (obj, macho_data) = dyld_cache_file_data.make_object()?.into_parts();
157                let debug_id = debug_id_for_object(&obj);
158                let code_id = code_id_for_object(&obj);
159                let (debug_path, debug_name) = (path.clone(), name.clone());
160                let arch = macho_data.get_arch().map(ToOwned::to_owned);
161                (debug_id, code_id, debug_path, debug_name, arch)
162            }
163            BinaryImageInner::JitDump(file, _index) => {
164                let header_bytes =
165                    file.read_bytes_at(0, JitDumpHeader::SIZE as u64)
166                        .map_err(|e| {
167                            Error::HelperErrorDuringFileReading(path.clone().unwrap_or_default(), e)
168                        })?;
169                let header = JitDumpHeader::parse(RawData::Single(header_bytes))
170                    .map_err(Error::JitDumpParsing)?;
171                let (debug_id, code_id_bytes) = debug_id_and_code_id_for_jitdump(
172                    header.pid,
173                    header.timestamp,
174                    header.elf_machine_arch,
175                );
176                let code_id = CodeId::ElfBuildId(ElfBuildId::from_bytes(&code_id_bytes));
177                let (debug_path, debug_name) = (path.clone(), name.clone());
178                let arch =
179                    elf_machine_arch_to_string(header.elf_machine_arch).map(ToOwned::to_owned);
180                (Some(debug_id), Some(code_id), debug_path, debug_name, arch)
181            }
182        };
183        let info = LibraryInfo {
184            debug_id,
185            debug_name,
186            debug_path,
187            name,
188            code_id,
189            path,
190            arch,
191        };
192        Ok(info)
193    }
194
195    fn make_object(
196        &self,
197    ) -> Result<Option<object::File<'_, RangeReadRef<'_, &'_ FileContentsWrapper<F>>>>, Error> {
198        match self {
199            BinaryImageInner::Normal(file, file_kind) => {
200                let obj = object::File::parse(file.full_range())
201                    .map_err(|e| Error::ObjectParseError(*file_kind, e))?;
202                Ok(Some(obj))
203            }
204            BinaryImageInner::MemberOfFatArchive(member, file_kind) => {
205                let obj = object::File::parse(member.data())
206                    .map_err(|e| Error::ObjectParseError(*file_kind, e))?;
207                Ok(Some(obj))
208            }
209            BinaryImageInner::MemberOfDyldSharedCache(dyld_cache_file_data) => {
210                let (obj, _) = dyld_cache_file_data.make_object()?.into_parts();
211                Ok(Some(obj))
212            }
213            BinaryImageInner::JitDump(_file, _index) => Ok(None),
214        }
215    }
216
217    /// Shortens the size as needed to fit in the section.
218    pub fn read_bytes_at_relative_address(
219        &self,
220        start_address: u32,
221        size: u32,
222    ) -> Result<&[u8], CodeByteReadingError> {
223        let object = match self.make_object().expect("We've succeeded before") {
224            Some(obj) => obj,
225            None => {
226                // No object. This must be JITDUMP.
227                if let BinaryImageInner::JitDump(data, index) = self {
228                    let (entry_index, _symbol_address, offset_from_symbol) = index
229                        .lookup_relative_address(start_address)
230                        .ok_or(CodeByteReadingError::AddressNotFound)?;
231                    let entry = &index.entries[entry_index];
232                    let symbol_code_bytes_len = entry.code_bytes_len;
233                    let remaining_bytes_after_start_address =
234                        symbol_code_bytes_len - offset_from_symbol;
235                    let size = (size as u64).min(remaining_bytes_after_start_address);
236                    let start_offset = entry.code_bytes_offset + offset_from_symbol;
237                    return Ok(data.read_bytes_at(start_offset, size)?);
238                } else {
239                    panic!()
240                }
241            }
242        };
243
244        // Translate start_address from a "relative address" into an
245        // SVMA ("stated virtual memory address").
246        let image_base = relative_address_base(&object);
247        let start_svma = image_base + u64::from(start_address);
248
249        // Find the section and segment which contains our start_svma.
250        use object::{ObjectSection, ObjectSegment};
251        let (section, section_end_svma) = object
252            .sections()
253            .find_map(|section| {
254                let section_start_svma = section.address();
255                let section_end_svma = section_start_svma.checked_add(section.size())?;
256                if !(section_start_svma..section_end_svma).contains(&start_svma) {
257                    return None;
258                }
259
260                Some((section, section_end_svma))
261            })
262            .ok_or(CodeByteReadingError::AddressNotFound)?;
263
264        let segment = object.segments().find(|segment| {
265            let segment_start_svma = segment.address();
266            if let Some(segment_end_svma) = segment_start_svma.checked_add(segment.size()) {
267                (segment_start_svma..segment_end_svma).contains(&start_svma)
268            } else {
269                false
270            }
271        });
272
273        let max_read_len = section_end_svma - start_svma;
274        let read_len = u64::from(size).min(max_read_len);
275
276        // Now read the instruction bytes from the file.
277        let bytes = if let Some(segment) = segment {
278            segment
279                .data_range(start_svma, read_len)?
280                .ok_or(CodeByteReadingError::ByteRangeNotInSection)?
281        } else {
282            // We don't have a segment, try reading via the section.
283            // We hit this path with synthetic .so files created by `perf inject --jit`;
284            // those only have sections, no segments (i.e. no ELF LOAD commands).
285            // For regular files, we prefer to read the data via the segment, because
286            // the segment is more likely to have correct file offset information.
287            // Specifically, incorrect section file offset information was observed in
288            // the arm64e dyld cache on macOS 13.0.1, FB11929250.
289            section
290                .data_range(start_svma, read_len)?
291                .ok_or(CodeByteReadingError::ByteRangeNotInSection)?
292        };
293        Ok(bytes)
294    }
295}
296
297struct PeInfo {
298    code_id: CodeId,
299    pdb_path: Option<String>,
300    pdb_name: Option<String>,
301}
302
303impl PeInfo {
304    pub fn into_tuple(self) -> (Option<CodeId>, Option<String>, Option<String>) {
305        (Some(self.code_id), self.pdb_path, self.pdb_name)
306    }
307}
308
309fn pe_info<'a, Pe: ImageNtHeaders, R: ReadRef<'a>>(pe: &PeFile<'a, Pe, R>) -> PeInfo {
310    // The code identifier consists of the `time_date_stamp` field id the COFF header, followed by
311    // the `size_of_image` field in the optional header. If the optional PE header is not present,
312    // this identifier is `None`.
313    let header = pe.nt_headers();
314    let timestamp = header
315        .file_header()
316        .time_date_stamp
317        .get(object::LittleEndian);
318    let image_size = header.optional_header().size_of_image();
319    let code_id = CodeId::PeCodeId(PeCodeId {
320        timestamp,
321        image_size,
322    });
323
324    let pdb_path: Option<String> = pe.pdb_info().ok().and_then(|pdb_info| {
325        let pdb_path = std::str::from_utf8(pdb_info?.path()).ok()?;
326        Some(pdb_path.to_string())
327    });
328
329    let pdb_name = pdb_path
330        .as_deref()
331        .map(|pdb_path| match pdb_path.rsplit_once(['/', '\\']) {
332            Some((_base, file_name)) => file_name.to_string(),
333            None => pdb_path.to_string(),
334        });
335
336    PeInfo {
337        code_id,
338        pdb_path,
339        pdb_name,
340    }
341}
342
343fn object_arch_to_string(arch: object::Architecture) -> Option<&'static str> {
344    let s = match arch {
345        object::Architecture::Arm => "arm",
346        object::Architecture::Aarch64 => "arm64",
347        object::Architecture::I386 => "x86",
348        object::Architecture::X86_64 => "x86_64",
349        _ => return None,
350    };
351    Some(s)
352}
353
354fn elf_machine_arch_to_string(elf_machine_arch: u32) -> Option<&'static str> {
355    let s = match elf_machine_arch as u16 {
356        object::elf::EM_ARM => "arm",
357        object::elf::EM_AARCH64 => "arm64",
358        object::elf::EM_386 => "x86",
359        object::elf::EM_X86_64 => "x86_64",
360        _ => return None,
361    };
362    Some(s)
363}