1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
use debugid::DebugId;
use object::{Object, ObjectSection, SectionKind};
use std::convert::TryInto;
use uuid::Uuid;

pub trait DebugIdExt {
    /// Creates a DebugId from some identifier. The identifier could be
    /// an ELF build ID, or a hash derived from the text section.
    /// The `little_endian` argument specifies whether the object file
    /// is targeting a little endian architecture.
    fn from_identifier(identifier: &[u8], little_endian: bool) -> Self;

    /// Creates a DebugId from a hash of the first 4096 bytes of the .text section.
    /// The `little_endian` argument specifies whether the object file
    /// is targeting a little endian architecture.
    fn from_text_first_page(text_first_page: &[u8], little_endian: bool) -> Self;
}

impl DebugIdExt for DebugId {
    fn from_identifier(identifier: &[u8], little_endian: bool) -> Self {
        // Make sure that we have exactly 16 bytes available, either truncate or fill
        // the remainder with zeros.
        // ELF build IDs are usually 20 bytes, so if the identifier is an ELF build ID
        // then we're performing a lossy truncation.
        let mut d = [0u8; 16];
        let shared_len = identifier.len().min(d.len());
        d[0..shared_len].copy_from_slice(&identifier[0..shared_len]);

        // Pretend that the build ID was stored as a UUID with u32 u16 u16 fields inside
        // the file. Parse those fields in the endianness of the file. Then use
        // Uuid::from_fields to serialize them as big endian.
        // For ELF build IDs this is a bit silly, because ELF build IDs aren't actually
        // field-based UUIDs, but this is what the tools in the breakpad and
        // sentry/symbolic universe do, so we do the same for compatibility with those
        // tools.
        let (d1, d2, d3) = if little_endian {
            (
                u32::from_le_bytes([d[0], d[1], d[2], d[3]]),
                u16::from_le_bytes([d[4], d[5]]),
                u16::from_le_bytes([d[6], d[7]]),
            )
        } else {
            (
                u32::from_be_bytes([d[0], d[1], d[2], d[3]]),
                u16::from_be_bytes([d[4], d[5]]),
                u16::from_be_bytes([d[6], d[7]]),
            )
        };
        let uuid = Uuid::from_fields(d1, d2, d3, d[8..16].try_into().unwrap());
        DebugId::from_uuid(uuid)
    }

    // This algorithm XORs 16-byte chunks directly into a 16-byte buffer.
    fn from_text_first_page(text_first_page: &[u8], little_endian: bool) -> Self {
        const UUID_SIZE: usize = 16;
        const PAGE_SIZE: usize = 4096;
        let mut hash = [0; UUID_SIZE];
        for (i, byte) in text_first_page.iter().cloned().take(PAGE_SIZE).enumerate() {
            hash[i % UUID_SIZE] ^= byte;
        }
        DebugId::from_identifier(&hash, little_endian)
    }
}

/// Tries to obtain a DebugId for an object. This uses the build ID, if available,
/// and falls back to hashing the first page of the text section otherwise.
/// Returns None on failure.
pub fn debug_id_for_object<'data: 'file, 'file>(
    obj: &'file impl Object<'data, 'file>,
) -> Option<DebugId> {
    // Windows
    if let Ok(Some(pdb_info)) = obj.pdb_info() {
        return Some(DebugId::from_guid_age(&pdb_info.guid(), pdb_info.age()).unwrap());
    }

    // ELF
    if let Ok(Some(build_id)) = obj.build_id() {
        return Some(DebugId::from_identifier(build_id, obj.is_little_endian()));
    }

    // mach-O
    if let Ok(Some(uuid)) = obj.mach_uuid() {
        return Some(DebugId::from_uuid(Uuid::from_bytes(uuid)));
    }

    // We were not able to locate a build ID, so fall back to creating a synthetic
    // identifier from a hash of the first page of the "text" (program code) section.
    if let Some(section) = obj
        .sections()
        .find(|section| section.kind() == SectionKind::Text)
    {
        let data_len = section.size().min(4096);
        if let Ok(Some(first_page_data)) = section.data_range(section.address(), data_len) {
            return Some(DebugId::from_text_first_page(
                first_page_data,
                obj.is_little_endian(),
            ));
        }
    }

    None
}