Skip to main content

symbolic_debuginfo/macho/
bcsymbolmap.rs

1//! The Apple [`BcSymbolMap`] file format.
2
3use std::error::Error;
4use std::fmt;
5use std::io::Cursor;
6use std::iter::FusedIterator;
7use std::path::Path;
8
9use elementtree::Element;
10use symbolic_common::{AsSelf, DebugId, ParseDebugIdError};
11use thiserror::Error;
12
13use super::SWIFT_HIDDEN_PREFIX;
14
15const BC_SYMBOL_MAP_HEADER: &str = "BCSymbolMap Version: 2.0";
16
17/// The error type for handling a [`BcSymbolMap`].
18#[derive(Debug, Error)]
19#[error("{kind}")]
20pub struct BcSymbolMapError {
21    kind: BcSymbolMapErrorKind,
22    #[source]
23    source: Option<Box<dyn Error + Send + Sync + 'static>>,
24}
25
26/// Error kind for [`BcSymbolMapError`].
27#[non_exhaustive]
28#[derive(Clone, Copy, Debug, PartialEq, Eq)]
29enum BcSymbolMapErrorKind {
30    /// The BCSymbolMap header does not match a supported version.
31    ///
32    /// It could be entirely missing, or only be an unknown version or otherwise corrupted.
33    InvalidHeader,
34    /// The bitcode symbol map did contain invalid UTF-8.
35    InvalidUtf8,
36}
37
38impl fmt::Display for BcSymbolMapErrorKind {
39    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
40        match self {
41            Self::InvalidHeader => write!(f, "no valid BCSymbolMap header was found"),
42            Self::InvalidUtf8 => write!(f, "BCSymbolmap is not valid UTF-8"),
43        }
44    }
45}
46
47/// An in-memory representation of the Apple bitcode symbol map.
48///
49/// This is an auxiliary file, not an object file.  It can be used to provide de-obfuscated
50/// symbol names to a [`MachObject`] object using its
51/// [`load_symbolmap`](crate::macho::MachObject::load_symbolmap) method.
52///
53/// It is common for bitcode builds to obfuscate the names in the object file's symbol table
54/// so that even the DWARF files do not have the actual symbol names.  In this case the
55/// build process will create a `.bcsymbolmap` file which maps the obfuscated symbol names
56/// back to the original ones.  This structure can parse these files and allows providing
57/// this information to the [`MachObject`] so that it has the original symbol names instead
58/// of `__hidden#NNN_` ones.
59///
60/// See [`MachObject::load_symbolmap`](crate::macho::MachObject::load_symbolmap) for an
61/// example of how to use this.
62///
63/// [`MachObject`]: crate::macho::MachObject
64#[derive(Clone, Debug)]
65pub struct BcSymbolMap<'d> {
66    names: Vec<&'d str>,
67}
68
69impl From<BcSymbolMapErrorKind> for BcSymbolMapError {
70    fn from(source: BcSymbolMapErrorKind) -> Self {
71        Self {
72            kind: source,
73            source: None,
74        }
75    }
76}
77
78impl<'slf> AsSelf<'slf> for BcSymbolMap<'_> {
79    type Ref = BcSymbolMap<'slf>;
80
81    fn as_self(&'slf self) -> &'slf Self::Ref {
82        self
83    }
84}
85
86impl<'d> BcSymbolMap<'d> {
87    /// Tests whether the buffer could contain a [`BcSymbolMap`].
88    pub fn test(bytes: &[u8]) -> bool {
89        let pattern = BC_SYMBOL_MAP_HEADER.as_bytes();
90        bytes.starts_with(pattern)
91    }
92
93    /// Parses the BCSymbolMap.
94    ///
95    /// A symbol map does not contain the UUID of its symbols, instead this is normally
96    /// encoded in the filename.
97    pub fn parse(data: &'d [u8]) -> Result<Self, BcSymbolMapError> {
98        let content = std::str::from_utf8(data).map_err(|err| BcSymbolMapError {
99            kind: BcSymbolMapErrorKind::InvalidUtf8,
100            source: Some(Box::new(err)),
101        })?;
102
103        let mut lines_iter = content.lines();
104
105        let header = lines_iter
106            .next()
107            .ok_or(BcSymbolMapErrorKind::InvalidHeader)?;
108        if header != BC_SYMBOL_MAP_HEADER {
109            return Err(BcSymbolMapErrorKind::InvalidHeader.into());
110        }
111
112        let names = lines_iter.collect();
113
114        Ok(Self { names })
115    }
116
117    /// Returns the name of a symbol if it exists in this mapping.
118    ///
119    /// # Examples
120    ///
121    /// ```
122    /// use symbolic_debuginfo::macho::BcSymbolMap;
123    ///
124    /// // let data = std::fs::read("c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap").unwrap();
125    /// # let data =
126    /// #     std::fs::read("tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap")
127    /// #         .unwrap();
128    /// let map = BcSymbolMap::parse(&data).unwrap();
129    ///
130    /// assert_eq!(map.get(43), Some("Sources/Sentry/Public/SentryMessage.h"));
131    /// assert_eq!(map.get(usize::MAX), None);  // We do not have this many entries
132    /// ```
133    pub fn get(&self, index: usize) -> Option<&'d str> {
134        self.names.get(index).copied()
135    }
136
137    /// Resolves a name using this mapping.
138    ///
139    /// If the name matches the `__hidden#NNN_` pattern that indicates a [`BcSymbolMap`]
140    /// lookup it will be looked up the resolved name will be returned.  Otherwise the name
141    /// is returned unchanged.
142    ///
143    /// # Examples
144    ///
145    /// ```
146    /// use symbolic_debuginfo::macho::BcSymbolMap;
147    ///
148    /// // let data = std::fs::read("c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap").unwrap();
149    /// # let data =
150    /// #     std::fs::read("tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap")
151    /// #         .unwrap();
152    /// let map = BcSymbolMap::parse(&data).unwrap();
153    ///
154    /// assert_eq!(map.resolve("__hidden#43_"), "Sources/Sentry/Public/SentryMessage.h");
155    /// assert_eq!(map.resolve("_addJSONData"), "_addJSONData");  // #64
156    /// ```
157    pub fn resolve(&self, mut name: &'d str) -> &'d str {
158        if let Some(tail) = name.strip_prefix(SWIFT_HIDDEN_PREFIX) {
159            if let Some(index_as_string) = tail.strip_suffix('_') {
160                name = index_as_string
161                    .parse::<usize>()
162                    .ok()
163                    .and_then(|index| self.get(index))
164                    .unwrap_or(name);
165            }
166        }
167        name
168    }
169
170    /// Resolves a name given in raw bytes using this mapping.
171    ///
172    /// Contrary to [`BcSymbolMap::resolve`], this works with any byte-like type, and returns an
173    /// [`Option`].
174    pub(crate) fn resolve_opt(&self, name: impl AsRef<[u8]>) -> Option<&str> {
175        let name = std::str::from_utf8(name.as_ref()).ok()?;
176        let tail = name.strip_prefix(SWIFT_HIDDEN_PREFIX)?;
177        let index_as_string = tail.strip_suffix('_')?;
178        let index = index_as_string.parse::<usize>().ok()?;
179        self.get(index)
180    }
181
182    /// Returns an iterator over all the names in this bitcode symbol map.
183    pub fn iter(&self) -> BcSymbolMapIterator<'_, 'd> {
184        BcSymbolMapIterator {
185            iter: self.names.iter(),
186        }
187    }
188}
189
190/// Iterator over the names in a [`BcSymbolMap`].
191///
192/// This struct is created by [`BcSymbolMap::iter`].
193pub struct BcSymbolMapIterator<'a, 'd> {
194    iter: std::slice::Iter<'a, &'d str>,
195}
196
197impl<'d> Iterator for BcSymbolMapIterator<'_, 'd> {
198    type Item = &'d str;
199
200    fn next(&mut self) -> Option<Self::Item> {
201        self.iter.next().copied()
202    }
203
204    fn size_hint(&self) -> (usize, Option<usize>) {
205        self.iter.size_hint()
206    }
207}
208
209impl FusedIterator for BcSymbolMapIterator<'_, '_> {}
210
211/// Error for handling or creating a [`UuidMapping`].
212#[derive(Debug, Error)]
213#[error("{kind}")]
214pub struct UuidMappingError {
215    kind: UuidMappingErrorKind,
216    #[source]
217    source: Option<Box<dyn Error + Send + Sync + 'static>>,
218}
219
220impl From<elementtree::Error> for UuidMappingError {
221    fn from(source: elementtree::Error) -> Self {
222        Self {
223            kind: UuidMappingErrorKind::PListParse,
224            source: Some(Box::new(source)),
225        }
226    }
227}
228
229impl From<UuidMappingErrorKind> for UuidMappingError {
230    fn from(kind: UuidMappingErrorKind) -> Self {
231        Self { kind, source: None }
232    }
233}
234
235impl From<ParseDebugIdError> for UuidMappingError {
236    fn from(source: ParseDebugIdError) -> Self {
237        Self {
238            kind: UuidMappingErrorKind::PListParseValue,
239            source: Some(Box::new(source)),
240        }
241    }
242}
243
244/// Error kind for [`UuidMappingError`].
245#[non_exhaustive]
246#[derive(Clone, Copy, Debug, PartialEq, Eq)]
247enum UuidMappingErrorKind {
248    /// The plist did not have the expected (XML) schema.
249    PListSchema,
250    /// There was an (XML) parsing error parsing the plist.
251    PListParse,
252    /// Failed to parse a required PList value.
253    PListParseValue,
254    /// Failed to parse UUID from filename.
255    ParseFilename,
256}
257
258impl fmt::Display for UuidMappingErrorKind {
259    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
260        match self {
261            Self::PListSchema => write!(f, "XML structure did not match expected schema"),
262            Self::PListParse => write!(f, "Invalid XML"),
263            Self::PListParseValue => write!(f, "Failed to parse a value into the right type"),
264            Self::ParseFilename => write!(f, "Failed to parse UUID from filename"),
265        }
266    }
267}
268
269/// A mapping from the `dSYM` UUID to an original UUID.
270///
271/// This is used e.g. when dealing with bitcode builds, when Apple compiles objects from
272/// bitcode these objects will have new UUIDs as debug identifiers.  This mapping can be
273/// found in the `dSYMs/<object-id>/Contents/Resources/<object-id>.plist` file of downloaded
274/// debugging symbols.
275///
276/// This struct allows you to keep track of such a mapping and provides support for parsing
277/// it from the ProperyList file format.
278#[derive(Clone, Copy, Debug)]
279pub struct UuidMapping {
280    dsym_uuid: DebugId,
281    original_uuid: DebugId,
282}
283
284impl UuidMapping {
285    /// Creates a new UUID mapping from two [`DebugId`]s.
286    pub fn new(dsym_uuid: DebugId, original_uuid: DebugId) -> Self {
287        Self {
288            dsym_uuid,
289            original_uuid,
290        }
291    }
292
293    /// Parses a PropertyList containing a `DBGOriginalUUID` mapping.
294    ///
295    /// The `filename` may contain multiple path segments, the stem of the filename segment
296    /// should contain the UUID of the `dSYM`.
297    ///
298    /// # Examples
299    ///
300    /// ```
301    /// use std::path::Path;
302    /// use symbolic_common::DebugId;
303    /// use symbolic_debuginfo::macho::UuidMapping;
304    ///
305    /// let filename = Path::new("2d10c42f-591d-3265-b147-78ba0868073f.plist");
306    /// # let filename = Path::new("tests/fixtures/2d10c42f-591d-3265-b147-78ba0868073f.plist");
307    /// let dsym_uuid: DebugId = filename
308    ///     .file_stem().unwrap()
309    ///     .to_str().unwrap()
310    ///     .parse().unwrap();
311    /// let data = std::fs::read(filename).unwrap();
312    ///
313    /// let uuid_map = UuidMapping::parse_plist(dsym_uuid, &data).unwrap();
314    ///
315    /// assert_eq!(uuid_map.dsym_uuid(), dsym_uuid);
316    /// assert_eq!(
317    ///     uuid_map.original_uuid(),
318    ///     "c8374b6d-6e96-34d8-ae38-efaa5fec424f".parse().unwrap(),
319    /// )
320    /// ```
321    pub fn parse_plist(dsym_uuid: DebugId, data: &[u8]) -> Result<Self, UuidMappingError> {
322        Ok(Self {
323            dsym_uuid,
324            original_uuid: uuid_from_plist(data)?,
325        })
326    }
327
328    /// Parses a PropertyList containing a `DBGOriginalUUID` mapping.
329    ///
330    /// This is a convenience version of [`UuidMapping::parse_plist`] which extracts
331    /// the UUID from the `filename`.
332    ///
333    /// The `filename` may contain multiple path segments, the stem of the filename segment
334    /// should contain the UUID of the `dSYM`.  This is the format the PList is normally
335    /// found in a `dSYM` directory structure.
336    ///
337    /// # Examples
338    ///
339    /// ```
340    /// use std::path::Path;
341    /// use symbolic_common::DebugId;
342    /// use symbolic_debuginfo::macho::UuidMapping;
343    ///
344    /// let filename = Path::new("Contents/Resources/2D10C42F-591D-3265-B147-78BA0868073F.plist");
345    /// # let filename = Path::new("tests/fixtures/2d10c42f-591d-3265-b147-78ba0868073f.plist");
346    /// let data = std::fs::read(filename).unwrap();
347    ///
348    /// let uuid_map = UuidMapping::parse_plist_with_filename(filename, &data).unwrap();
349    ///
350    /// assert_eq!(
351    ///     uuid_map.dsym_uuid(),
352    ///     "2d10c42f-591d-3265-b147-78ba0868073f".parse().unwrap(),
353    /// );
354    /// assert_eq!(
355    ///     uuid_map.original_uuid(),
356    ///     "c8374b6d-6e96-34d8-ae38-efaa5fec424f".parse().unwrap(),
357    /// )
358    /// ```
359    pub fn parse_plist_with_filename(
360        filename: &Path,
361        data: &[u8],
362    ) -> Result<Self, UuidMappingError> {
363        let dsym_uuid = filename
364            .file_stem()
365            .ok_or_else(|| UuidMappingError::from(UuidMappingErrorKind::ParseFilename))?
366            .to_str()
367            .ok_or_else(|| UuidMappingError::from(UuidMappingErrorKind::ParseFilename))?
368            .parse()?;
369        Self::parse_plist(dsym_uuid, data)
370    }
371
372    /// Returns the UUID of the original object file.
373    pub fn original_uuid(&self) -> DebugId {
374        self.original_uuid
375    }
376
377    /// Returns the UUID of the compiled binary and associated `dSYM`.
378    pub fn dsym_uuid(&self) -> DebugId {
379        self.dsym_uuid
380    }
381}
382
383fn uuid_from_plist(data: &[u8]) -> Result<DebugId, UuidMappingError> {
384    let plist = Element::from_reader(Cursor::new(data))?;
385
386    let raw = uuid_from_xml_plist(plist)
387        .ok_or_else(|| UuidMappingError::from(UuidMappingErrorKind::PListSchema))?;
388
389    raw.parse().map_err(Into::into)
390}
391
392fn uuid_from_xml_plist(plist: Element) -> Option<String> {
393    let version = plist.get_attr("version")?;
394    if version != "1.0" {
395        return None;
396    }
397    let dict = plist.find("dict")?;
398
399    let mut found_key = false;
400    let mut raw_original = None;
401    for element in dict.children() {
402        if element.tag().name() == "key" && element.text() == "DBGOriginalUUID" {
403            found_key = true;
404        } else if found_key {
405            raw_original = Some(element.text().to_string());
406            break;
407        }
408    }
409
410    raw_original
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416
417    #[test]
418    fn test_bcsymbolmap_test() {
419        assert!(BcSymbolMap::test(b"BCSymbolMap Version: 2.0"));
420        assert!(!BcSymbolMap::test(b"BCSymbolMap Vers"));
421        assert!(!BcSymbolMap::test(b"oops"));
422    }
423
424    #[test]
425    fn test_basic() {
426        let data = std::fs::read_to_string(
427            "tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap",
428        )
429        .unwrap();
430
431        assert!(BcSymbolMap::test(data.as_bytes()));
432
433        let map = BcSymbolMap::parse(data.as_bytes()).unwrap();
434        assert_eq!(map.get(2), Some("-[SentryMessage serialize]"))
435    }
436
437    #[test]
438    fn test_iter() {
439        let data = std::fs::read_to_string(
440            "tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap",
441        )
442        .unwrap();
443        let map = BcSymbolMap::parse(data.as_bytes()).unwrap();
444
445        let mut map_iter = map.iter();
446
447        let (lower_bound, upper_bound) = map_iter.size_hint();
448        assert!(lower_bound > 0);
449        assert!(upper_bound.is_some());
450
451        let name = map_iter.next();
452        assert_eq!(name.unwrap(), "-[SentryMessage initWithFormatted:]");
453
454        let name = map_iter.next();
455        assert_eq!(name.unwrap(), "-[SentryMessage setMessage:]");
456    }
457
458    #[test]
459    fn test_data_lifetime() {
460        let data = std::fs::read_to_string(
461            "tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap",
462        )
463        .unwrap();
464
465        let name = {
466            let map = BcSymbolMap::parse(data.as_bytes()).unwrap();
467            map.get(0).unwrap()
468        };
469
470        assert_eq!(name, "-[SentryMessage initWithFormatted:]");
471    }
472
473    #[test]
474    fn test_resolve() {
475        let data = std::fs::read_to_string(
476            "tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap",
477        )
478        .unwrap();
479        let map = BcSymbolMap::parse(data.as_bytes()).unwrap();
480
481        assert_eq!(map.resolve("normal_name"), "normal_name");
482        assert_eq!(map.resolve("__hidden#2_"), "-[SentryMessage serialize]");
483    }
484
485    #[test]
486    fn test_plist() {
487        let uuid: DebugId = "2d10c42f-591d-3265-b147-78ba0868073f".parse().unwrap();
488        let data =
489            std::fs::read("tests/fixtures/2d10c42f-591d-3265-b147-78ba0868073f.plist").unwrap();
490        let map = UuidMapping::parse_plist(uuid, &data).unwrap();
491
492        assert_eq!(map.dsym_uuid(), uuid);
493        assert_eq!(
494            map.original_uuid(),
495            "c8374b6d-6e96-34d8-ae38-efaa5fec424f".parse().unwrap()
496        );
497    }
498}