samply_symbols/
debugid_util.rs

1use debugid::DebugId;
2use object::{Object, ObjectSection};
3use uuid::Uuid;
4
5use crate::shared::{CodeId, ElfBuildId};
6
7pub trait DebugIdExt {
8    /// Creates a DebugId from some identifier. The identifier could be
9    /// an ELF build ID, or a hash derived from the text section.
10    /// The `little_endian` argument specifies whether the object file
11    /// is targeting a little endian architecture.
12    fn from_identifier(identifier: &[u8], little_endian: bool) -> Self;
13
14    /// Creates a DebugId from a hash of the first 4096 bytes of the .text section.
15    /// The `little_endian` argument specifies whether the object file
16    /// is targeting a little endian architecture.
17    fn from_text_first_page(text_first_page: &[u8], little_endian: bool) -> Self;
18}
19
20impl DebugIdExt for DebugId {
21    fn from_identifier(identifier: &[u8], little_endian: bool) -> Self {
22        // Make sure that we have exactly 16 bytes available, either truncate or fill
23        // the remainder with zeros.
24        // ELF build IDs are usually 20 bytes, so if the identifier is an ELF build ID
25        // then we're performing a lossy truncation.
26        let mut d = [0u8; 16];
27        let shared_len = identifier.len().min(d.len());
28        d[0..shared_len].copy_from_slice(&identifier[0..shared_len]);
29
30        // Pretend that the build ID was stored as a UUID with u32 u16 u16 fields inside
31        // the file. Parse those fields in the endianness of the file. Then use
32        // Uuid::from_fields to serialize them as big endian.
33        // For ELF build IDs this is a bit silly, because ELF build IDs aren't actually
34        // field-based UUIDs, but this is what the tools in the breakpad and
35        // sentry/symbolic universe do, so we do the same for compatibility with those
36        // tools.
37        let (d1, d2, d3) = if little_endian {
38            (
39                u32::from_le_bytes([d[0], d[1], d[2], d[3]]),
40                u16::from_le_bytes([d[4], d[5]]),
41                u16::from_le_bytes([d[6], d[7]]),
42            )
43        } else {
44            (
45                u32::from_be_bytes([d[0], d[1], d[2], d[3]]),
46                u16::from_be_bytes([d[4], d[5]]),
47                u16::from_be_bytes([d[6], d[7]]),
48            )
49        };
50        let uuid = Uuid::from_fields(d1, d2, d3, d[8..16].try_into().unwrap());
51        DebugId::from_uuid(uuid)
52    }
53
54    // This algorithm XORs 16-byte chunks directly into a 16-byte buffer.
55    fn from_text_first_page(text_first_page: &[u8], little_endian: bool) -> Self {
56        const UUID_SIZE: usize = 16;
57        const PAGE_SIZE: usize = 4096;
58        let mut hash = [0; UUID_SIZE];
59        for (i, byte) in text_first_page.iter().cloned().take(PAGE_SIZE).enumerate() {
60            hash[i % UUID_SIZE] ^= byte;
61        }
62        DebugId::from_identifier(&hash, little_endian)
63    }
64}
65
66/// Tries to obtain a DebugId for an object. This uses the build ID, if available,
67/// and falls back to hashing the first page of the text section otherwise.
68/// Returns None on failure.
69pub fn debug_id_for_object<'data>(obj: &impl Object<'data>) -> Option<DebugId> {
70    // Windows
71    if let Ok(Some(pdb_info)) = obj.pdb_info() {
72        return Some(DebugId::from_guid_age(&pdb_info.guid(), pdb_info.age()).unwrap());
73    }
74
75    // ELF
76    if let Ok(Some(build_id)) = obj.build_id() {
77        return Some(DebugId::from_identifier(build_id, obj.is_little_endian()));
78    }
79
80    // mach-O
81    if let Ok(Some(uuid)) = obj.mach_uuid() {
82        return Some(DebugId::from_uuid(Uuid::from_bytes(uuid)));
83    }
84
85    // We were not able to locate a build ID, so fall back to creating a synthetic
86    // identifier from a hash of the first page of the ".text" (program code) section.
87    if let Some(section) = obj.section_by_name(".text") {
88        let data_len = section.size().min(4096);
89        if let Ok(Some(first_page_data)) = section.data_range(section.address(), data_len) {
90            return Some(DebugId::from_text_first_page(
91                first_page_data,
92                obj.is_little_endian(),
93            ));
94        }
95    }
96
97    None
98}
99
100/// Tries to obtain a CodeId for an object.
101///
102/// This currently only handles mach-O and ELF.
103pub fn code_id_for_object<'data>(obj: &impl Object<'data>) -> Option<CodeId> {
104    // ELF
105    if let Ok(Some(build_id)) = obj.build_id() {
106        return Some(CodeId::ElfBuildId(ElfBuildId::from_bytes(build_id)));
107    }
108
109    // mach-O
110    if let Ok(Some(uuid)) = obj.mach_uuid() {
111        return Some(CodeId::MachoUuid(Uuid::from_bytes(uuid)));
112    }
113
114    None
115}