1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
//! The Apple [`BcSymbolMap`] file format.

use std::error::Error;
use std::fmt;
use std::io::Cursor;
use std::iter::FusedIterator;
use std::path::Path;

use elementtree::Element;
use symbolic_common::{AsSelf, DebugId, ParseDebugIdError};
use thiserror::Error;

use super::SWIFT_HIDDEN_PREFIX;

const BC_SYMBOL_MAP_HEADER: &str = "BCSymbolMap Version: 2.0";

/// The error type for handling a [`BcSymbolMap`].
#[derive(Debug, Error)]
#[error("{kind}")]
pub struct BcSymbolMapError {
    kind: BcSymbolMapErrorKind,
    #[source]
    source: Option<Box<dyn Error + Send + Sync + 'static>>,
}

/// Error kind for [`BcSymbolMapError`].
#[non_exhaustive]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum BcSymbolMapErrorKind {
    /// The BCSymbolMap header does not match a supported version.
    ///
    /// It could be entirely missing, or only be an unknown version or otherwise corrupted.
    InvalidHeader,
    /// The bitcode symbol map did contain invalid UTF-8.
    InvalidUtf8,
}

impl fmt::Display for BcSymbolMapErrorKind {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Self::InvalidHeader => write!(f, "no valid BCSymbolMap header was found"),
            Self::InvalidUtf8 => write!(f, "BCSymbolmap is not valid UTF-8"),
        }
    }
}

/// An in-memory representation of the Apple bitcode symbol map.
///
/// This is an auxiliary file, not an object file.  It can be used to provide de-obfuscated
/// symbol names to a [`MachObject`] object using its
/// [`load_symbolmap`](crate::macho::MachObject::load_symbolmap) method.
///
/// It is common for bitcode builds to obfuscate the names in the object file's symbol table
/// so that even the DWARF files do not have the actual symbol names.  In this case the
/// build process will create a `.bcsymbolmap` file which maps the obfuscated symbol names
/// back to the original ones.  This structure can parse these files and allows providing
/// this information to the [`MachObject`] so that it has the original symbol names instead
/// of `__hidden#NNN_` ones.
///
/// See [`MachObject::load_symbolmap`](crate::macho::MachObject::load_symbolmap) for an
/// example of how to use this.
///
/// [`MachObject`]: crate::macho::MachObject
#[derive(Clone, Debug)]
pub struct BcSymbolMap<'d> {
    names: Vec<&'d str>,
}

impl From<BcSymbolMapErrorKind> for BcSymbolMapError {
    fn from(source: BcSymbolMapErrorKind) -> Self {
        Self {
            kind: source,
            source: None,
        }
    }
}

impl<'slf> AsSelf<'slf> for BcSymbolMap<'_> {
    type Ref = BcSymbolMap<'slf>;

    fn as_self(&'slf self) -> &Self::Ref {
        self
    }
}

impl<'d> BcSymbolMap<'d> {
    /// Tests whether the buffer could contain a [`BcSymbolMap`].
    pub fn test(bytes: &[u8]) -> bool {
        let pattern = BC_SYMBOL_MAP_HEADER.as_bytes();
        bytes.starts_with(pattern)
    }

    /// Parses the BCSymbolMap.
    ///
    /// A symbol map does not contain the UUID of its symbols, instead this is normally
    /// encoded in the filename.
    pub fn parse(data: &'d [u8]) -> Result<Self, BcSymbolMapError> {
        let content = std::str::from_utf8(data).map_err(|err| BcSymbolMapError {
            kind: BcSymbolMapErrorKind::InvalidUtf8,
            source: Some(Box::new(err)),
        })?;

        let mut lines_iter = content.lines();

        let header = lines_iter
            .next()
            .ok_or(BcSymbolMapErrorKind::InvalidHeader)?;
        if header != BC_SYMBOL_MAP_HEADER {
            return Err(BcSymbolMapErrorKind::InvalidHeader.into());
        }

        let names = lines_iter.collect();

        Ok(Self { names })
    }

    /// Returns the name of a symbol if it exists in this mapping.
    ///
    /// # Examples
    ///
    /// ```
    /// use symbolic_debuginfo::macho::BcSymbolMap;
    ///
    /// // let data = std::fs::read("c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap").unwrap();
    /// # let data =
    /// #     std::fs::read("tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap")
    /// #         .unwrap();
    /// let map = BcSymbolMap::parse(&data).unwrap();
    ///
    /// assert_eq!(map.get(43), Some("Sources/Sentry/Public/SentryMessage.h"));
    /// assert_eq!(map.get(usize::MAX), None);  // We do not have this many entries
    /// ```
    pub fn get(&self, index: usize) -> Option<&'d str> {
        self.names.get(index).copied()
    }

    /// Resolves a name using this mapping.
    ///
    /// If the name matches the `__hidden#NNN_` pattern that indicates a [`BcSymbolMap`]
    /// lookup it will be looked up the resolved name will be returned.  Otherwise the name
    /// is returned unchanged.
    ///
    /// # Examples
    ///
    /// ```
    /// use symbolic_debuginfo::macho::BcSymbolMap;
    ///
    /// // let data = std::fs::read("c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap").unwrap();
    /// # let data =
    /// #     std::fs::read("tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap")
    /// #         .unwrap();
    /// let map = BcSymbolMap::parse(&data).unwrap();
    ///
    /// assert_eq!(map.resolve("__hidden#43_"), "Sources/Sentry/Public/SentryMessage.h");
    /// assert_eq!(map.resolve("_addJSONData"), "_addJSONData");  // #64
    /// ```
    pub fn resolve(&self, mut name: &'d str) -> &'d str {
        if let Some(tail) = name.strip_prefix(SWIFT_HIDDEN_PREFIX) {
            if let Some(index_as_string) = tail.strip_suffix('_') {
                name = index_as_string
                    .parse::<usize>()
                    .ok()
                    .and_then(|index| self.get(index))
                    .unwrap_or(name);
            }
        }
        name
    }

    /// Resolves a name given in raw bytes using this mapping.
    ///
    /// Contrary to [`BcSymbolMap::resolve`], this works with any byte-like type, and returns an
    /// [`Option`].
    pub(crate) fn resolve_opt(&self, name: impl AsRef<[u8]>) -> Option<&str> {
        let name = std::str::from_utf8(name.as_ref()).ok()?;
        let tail = name.strip_prefix(SWIFT_HIDDEN_PREFIX)?;
        let index_as_string = tail.strip_suffix('_')?;
        let index = index_as_string.parse::<usize>().ok()?;
        self.get(index)
    }

    /// Returns an iterator over all the names in this bitcode symbol map.
    pub fn iter(&self) -> BcSymbolMapIterator<'_, 'd> {
        BcSymbolMapIterator {
            iter: self.names.iter(),
        }
    }
}

/// Iterator over the names in a [`BcSymbolMap`].
///
/// This struct is created by [`BcSymbolMap::iter`].
pub struct BcSymbolMapIterator<'a, 'd> {
    iter: std::slice::Iter<'a, &'d str>,
}

impl<'a, 'd> Iterator for BcSymbolMapIterator<'a, 'd> {
    type Item = &'d str;

    fn next(&mut self) -> Option<Self::Item> {
        self.iter.next().copied()
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        self.iter.size_hint()
    }
}

impl FusedIterator for BcSymbolMapIterator<'_, '_> {}

/// Error for handling or creating a [`UuidMapping`].
#[derive(Debug, Error)]
#[error("{kind}")]
pub struct UuidMappingError {
    kind: UuidMappingErrorKind,
    #[source]
    source: Option<Box<dyn Error + Send + Sync + 'static>>,
}

impl From<elementtree::Error> for UuidMappingError {
    fn from(source: elementtree::Error) -> Self {
        Self {
            kind: UuidMappingErrorKind::PListParse,
            source: Some(Box::new(source)),
        }
    }
}

impl From<UuidMappingErrorKind> for UuidMappingError {
    fn from(kind: UuidMappingErrorKind) -> Self {
        Self { kind, source: None }
    }
}

impl From<ParseDebugIdError> for UuidMappingError {
    fn from(source: ParseDebugIdError) -> Self {
        Self {
            kind: UuidMappingErrorKind::PListParseValue,
            source: Some(Box::new(source)),
        }
    }
}

/// Error kind for [`UuidMappingError`].
#[non_exhaustive]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum UuidMappingErrorKind {
    /// The plist did not have the expected (XML) schema.
    PListSchema,
    /// There was an (XML) parsing error parsing the plist.
    PListParse,
    /// Failed to parse a required PList value.
    PListParseValue,
    /// Failed to parse UUID from filename.
    ParseFilename,
}

impl fmt::Display for UuidMappingErrorKind {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Self::PListSchema => write!(f, "XML structure did not match expected schema"),
            Self::PListParse => write!(f, "Invalid XML"),
            Self::PListParseValue => write!(f, "Failed to parse a value into the right type"),
            Self::ParseFilename => write!(f, "Failed to parse UUID from filename"),
        }
    }
}

/// A mapping from the `dSYM` UUID to an original UUID.
///
/// This is used e.g. when dealing with bitcode builds, when Apple compiles objects from
/// bitcode these objects will have new UUIDs as debug identifiers.  This mapping can be
/// found in the `dSYMs/<object-id>/Contents/Resources/<object-id>.plist` file of downloaded
/// debugging symbols.
///
/// This struct allows you to keep track of such a mapping and provides support for parsing
/// it from the ProperyList file format.
#[derive(Clone, Copy, Debug)]
pub struct UuidMapping {
    dsym_uuid: DebugId,
    original_uuid: DebugId,
}

impl UuidMapping {
    /// Creates a new UUID mapping from two [`DebugId`]s.
    pub fn new(dsym_uuid: DebugId, original_uuid: DebugId) -> Self {
        Self {
            dsym_uuid,
            original_uuid,
        }
    }

    /// Parses a PropertyList containing a `DBGOriginalUUID` mapping.
    ///
    /// The `filename` may contain multiple path segments, the stem of the filename segment
    /// should contain the UUID of the `dSYM`.
    ///
    /// # Examples
    ///
    /// ```
    /// use std::path::Path;
    /// use symbolic_common::DebugId;
    /// use symbolic_debuginfo::macho::UuidMapping;
    ///
    /// let filename = Path::new("2d10c42f-591d-3265-b147-78ba0868073f.plist");
    /// # let filename = Path::new("tests/fixtures/2d10c42f-591d-3265-b147-78ba0868073f.plist");
    /// let dsym_uuid: DebugId = filename
    ///     .file_stem().unwrap()
    ///     .to_str().unwrap()
    ///     .parse().unwrap();
    /// let data = std::fs::read(filename).unwrap();
    ///
    /// let uuid_map = UuidMapping::parse_plist(dsym_uuid, &data).unwrap();
    ///
    /// assert_eq!(uuid_map.dsym_uuid(), dsym_uuid);
    /// assert_eq!(
    ///     uuid_map.original_uuid(),
    ///     "c8374b6d-6e96-34d8-ae38-efaa5fec424f".parse().unwrap(),
    /// )
    /// ```
    pub fn parse_plist(dsym_uuid: DebugId, data: &[u8]) -> Result<Self, UuidMappingError> {
        Ok(Self {
            dsym_uuid,
            original_uuid: uuid_from_plist(data)?,
        })
    }

    /// Parses a PropertyList containing a `DBGOriginalUUID` mapping.
    ///
    /// This is a convenience version of [`UuidMapping::parse_plist`] which extracts
    /// the UUID from the `filename`.
    ///
    /// The `filename` may contain multiple path segments, the stem of the filename segment
    /// should contain the UUID of the `dSYM`.  This is the format the PList is normally
    /// found in a `dSYM` directory structure.
    ///
    /// # Examples
    ///
    /// ```
    /// use std::path::Path;
    /// use symbolic_common::DebugId;
    /// use symbolic_debuginfo::macho::UuidMapping;
    ///
    /// let filename = Path::new("Contents/Resources/2D10C42F-591D-3265-B147-78BA0868073F.plist");
    /// # let filename = Path::new("tests/fixtures/2d10c42f-591d-3265-b147-78ba0868073f.plist");
    /// let data = std::fs::read(filename).unwrap();
    ///
    /// let uuid_map = UuidMapping::parse_plist_with_filename(filename, &data).unwrap();
    ///
    /// assert_eq!(
    ///     uuid_map.dsym_uuid(),
    ///     "2d10c42f-591d-3265-b147-78ba0868073f".parse().unwrap(),
    /// );
    /// assert_eq!(
    ///     uuid_map.original_uuid(),
    ///     "c8374b6d-6e96-34d8-ae38-efaa5fec424f".parse().unwrap(),
    /// )
    /// ```
    pub fn parse_plist_with_filename(
        filename: &Path,
        data: &[u8],
    ) -> Result<Self, UuidMappingError> {
        let dsym_uuid = filename
            .file_stem()
            .ok_or_else(|| UuidMappingError::from(UuidMappingErrorKind::ParseFilename))?
            .to_str()
            .ok_or_else(|| UuidMappingError::from(UuidMappingErrorKind::ParseFilename))?
            .parse()?;
        Self::parse_plist(dsym_uuid, data)
    }

    /// Returns the UUID of the original object file.
    pub fn original_uuid(&self) -> DebugId {
        self.original_uuid
    }

    /// Returns the UUID of the compiled binary and associated `dSYM`.
    pub fn dsym_uuid(&self) -> DebugId {
        self.dsym_uuid
    }
}

fn uuid_from_plist(data: &[u8]) -> Result<DebugId, UuidMappingError> {
    let plist = Element::from_reader(Cursor::new(data))?;

    let raw = uuid_from_xml_plist(plist)
        .ok_or_else(|| UuidMappingError::from(UuidMappingErrorKind::PListSchema))?;

    raw.parse().map_err(Into::into)
}

fn uuid_from_xml_plist(plist: Element) -> Option<String> {
    let version = plist.get_attr("version")?;
    if version != "1.0" {
        return None;
    }
    let dict = plist.find("dict")?;

    let mut found_key = false;
    let mut raw_original = None;
    for element in dict.children() {
        if element.tag().name() == "key" && element.text() == "DBGOriginalUUID" {
            found_key = true;
        } else if found_key {
            raw_original = Some(element.text().to_string());
            break;
        }
    }

    raw_original
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_bcsymbolmap_test() {
        assert!(BcSymbolMap::test(b"BCSymbolMap Version: 2.0"));
        assert!(!BcSymbolMap::test(b"BCSymbolMap Vers"));
        assert!(!BcSymbolMap::test(b"oops"));
    }

    #[test]
    fn test_basic() {
        let data = std::fs::read_to_string(
            "tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap",
        )
        .unwrap();

        assert!(BcSymbolMap::test(data.as_bytes()));

        let map = BcSymbolMap::parse(data.as_bytes()).unwrap();
        assert_eq!(map.get(2), Some("-[SentryMessage serialize]"))
    }

    #[test]
    fn test_iter() {
        let data = std::fs::read_to_string(
            "tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap",
        )
        .unwrap();
        let map = BcSymbolMap::parse(data.as_bytes()).unwrap();

        let mut map_iter = map.iter();

        let (lower_bound, upper_bound) = map_iter.size_hint();
        assert!(lower_bound > 0);
        assert!(upper_bound.is_some());

        let name = map_iter.next();
        assert_eq!(name.unwrap(), "-[SentryMessage initWithFormatted:]");

        let name = map_iter.next();
        assert_eq!(name.unwrap(), "-[SentryMessage setMessage:]");
    }

    #[test]
    fn test_data_lifetime() {
        let data = std::fs::read_to_string(
            "tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap",
        )
        .unwrap();

        let name = {
            let map = BcSymbolMap::parse(data.as_bytes()).unwrap();
            map.get(0).unwrap()
        };

        assert_eq!(name, "-[SentryMessage initWithFormatted:]");
    }

    #[test]
    fn test_resolve() {
        let data = std::fs::read_to_string(
            "tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap",
        )
        .unwrap();
        let map = BcSymbolMap::parse(data.as_bytes()).unwrap();

        assert_eq!(map.resolve("normal_name"), "normal_name");
        assert_eq!(map.resolve("__hidden#2_"), "-[SentryMessage serialize]");
    }

    #[test]
    fn test_plist() {
        let uuid: DebugId = "2d10c42f-591d-3265-b147-78ba0868073f".parse().unwrap();
        let data =
            std::fs::read("tests/fixtures/2d10c42f-591d-3265-b147-78ba0868073f.plist").unwrap();
        let map = UuidMapping::parse_plist(uuid, &data).unwrap();

        assert_eq!(map.dsym_uuid(), uuid);
        assert_eq!(
            map.original_uuid(),
            "c8374b6d-6e96-34d8-ae38-efaa5fec424f".parse().unwrap()
        );
    }
}