symbolic_debuginfo/macho/
mod.rs

1//! Support for Mach Objects, used on macOS and iOS.
2
3use std::borrow::Cow;
4use std::error::Error;
5use std::fmt;
6use std::sync::Arc;
7
8use goblin::mach;
9use smallvec::SmallVec;
10use thiserror::Error;
11
12use symbolic_common::{Arch, AsSelf, CodeId, DebugId, Uuid};
13
14use crate::base::*;
15use crate::dwarf::{Dwarf, DwarfDebugSession, DwarfError, DwarfSection, Endian};
16pub(crate) use mono_archive::{MonoArchive, MonoArchiveObjects};
17
18mod bcsymbolmap;
19pub mod compact;
20mod mono_archive;
21
22pub use bcsymbolmap::*;
23pub use compact::*;
24
25/// Prefix for hidden symbols from Apple BCSymbolMap builds.
26const SWIFT_HIDDEN_PREFIX: &str = "__hidden#";
27
28/// An error when dealing with [`MachObject`](struct.MachObject.html).
29#[derive(Debug, Error)]
30#[error("invalid MachO file")]
31pub struct MachError {
32    #[source]
33    source: Option<Box<dyn Error + Send + Sync + 'static>>,
34}
35
36impl MachError {
37    /// Creates a new MachO error from an arbitrary error payload.
38    fn new<E>(source: E) -> Self
39    where
40        E: Into<Box<dyn Error + Send + Sync>>,
41    {
42        let source = Some(source.into());
43        Self { source }
44    }
45}
46
47impl From<goblin::error::Error> for MachError {
48    fn from(e: goblin::error::Error) -> Self {
49        Self::new(e)
50    }
51}
52
53impl From<scroll::Error> for MachError {
54    fn from(e: scroll::Error) -> Self {
55        Self::new(e)
56    }
57}
58
59/// Mach Object containers, used for executables and debug companions on macOS and iOS.
60pub struct MachObject<'d> {
61    macho: mach::MachO<'d>,
62    data: &'d [u8],
63    bcsymbolmap: Option<Arc<BcSymbolMap<'d>>>,
64}
65
66impl<'d> MachObject<'d> {
67    /// Tests whether the buffer could contain a MachO object.
68    pub fn test(data: &[u8]) -> bool {
69        matches!(MachArchive::is_fat(data), Some(false))
70    }
71
72    /// Tries to parse a MachO from the given slice.
73    pub fn parse(data: &'d [u8]) -> Result<Self, MachError> {
74        mach::MachO::parse(data, 0)
75            .map(|macho| MachObject {
76                macho,
77                data,
78                bcsymbolmap: None,
79            })
80            .map_err(MachError::new)
81    }
82
83    /// Parses and loads the [`BcSymbolMap`] into the object.
84    ///
85    /// The bitcode symbol map must match the object, there is nothing in the symbol map
86    /// which allows this call to verify this.
87    ///
88    /// Once the symbolmap is loaded this object will transparently resolve any hidden
89    /// symbols using the provided symbolmap.
90    ///
91    /// # Examples
92    ///
93    /// ```
94    /// use symbolic_debuginfo::macho::{BcSymbolMap, MachObject};
95    ///
96    /// // let object_data = std::fs::read("dSYMs/.../Resources/DWARF/object").unwrap();
97    /// # let object_data =
98    /// #     std::fs::read("tests/fixtures/2d10c42f-591d-3265-b147-78ba0868073f.dwarf-hidden")
99    /// #         .unwrap();
100    /// let mut object = MachObject::parse(&object_data).unwrap();
101    ///
102    /// let map = object.symbol_map();
103    /// let symbol = map.lookup(0x5a74).unwrap();
104    /// assert_eq!(symbol.name.as_ref().map(|n| n.to_owned()).unwrap(), "__hidden#0_");
105    ///
106    /// // let bc_symbol_map_data =
107    /// //     std::fs::read("BCSymbolMaps/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap")
108    /// //     .unwrap();
109    /// # let bc_symbol_map_data =
110    /// #     std::fs::read("tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap")
111    /// #         .unwrap();
112    /// let bc_symbol_map = BcSymbolMap::parse(&bc_symbol_map_data).unwrap();
113    /// object.load_symbolmap(bc_symbol_map);
114    ///
115    ///
116    /// let map = object.symbol_map();
117    /// let symbol = map.lookup(0x5a74).unwrap();
118    /// assert_eq!(
119    ///     symbol.name.as_ref().map(|n| n.to_owned()).unwrap(),
120    ///     "-[SentryMessage initWithFormatted:]",
121    /// );
122    /// ```
123    // TODO: re-enable this deprecation once we have a convenient way of creating an owned SymCache Transformer.
124    // #[deprecated = "use the symbolic-symcache `Transformer` functionality instead"]
125    pub fn load_symbolmap(&mut self, symbolmap: BcSymbolMap<'d>) {
126        self.bcsymbolmap = Some(Arc::new(symbolmap));
127    }
128
129    /// Gets the Compact Unwind Info of this object, if any exists.
130    pub fn compact_unwind_info(&self) -> Result<Option<CompactUnwindInfoIter<'d>>, MachError> {
131        if let Some(section) = self.section("unwind_info") {
132            if let Cow::Borrowed(section) = section.data {
133                let arch = self.arch();
134                let is_little_endian = self.endianity() == Endian::Little;
135                return Ok(Some(CompactUnwindInfoIter::new(
136                    section,
137                    is_little_endian,
138                    arch,
139                )?));
140            }
141        }
142        Ok(None)
143    }
144
145    /// The container file format, which is always `FileFormat::MachO`.
146    pub fn file_format(&self) -> FileFormat {
147        FileFormat::MachO
148    }
149
150    fn find_uuid(&self) -> Option<Uuid> {
151        for cmd in &self.macho.load_commands {
152            if let mach::load_command::CommandVariant::Uuid(ref uuid_cmd) = cmd.command {
153                return Uuid::from_slice(&uuid_cmd.uuid).ok();
154            }
155        }
156
157        None
158    }
159
160    /// The name of the dylib if any.
161    pub fn name(&self) -> Option<&'d str> {
162        self.macho.name
163    }
164
165    /// The code identifier of this object.
166    ///
167    /// Mach objects use a UUID which is specified in the load commands that are part of the Mach
168    /// header. This UUID is generated at compile / link time and is usually unique per compilation.
169    pub fn code_id(&self) -> Option<CodeId> {
170        let uuid = self.find_uuid()?;
171        Some(CodeId::from_binary(&uuid.as_bytes()[..]))
172    }
173
174    /// The debug information identifier of a MachO file.
175    ///
176    /// This uses the same UUID as `code_id`.
177    pub fn debug_id(&self) -> DebugId {
178        self.find_uuid().map(DebugId::from_uuid).unwrap_or_default()
179    }
180
181    /// The CPU architecture of this object, as specified in the Mach header.
182    pub fn arch(&self) -> Arch {
183        use goblin::mach::constants::cputype;
184
185        match (self.macho.header.cputype(), self.macho.header.cpusubtype()) {
186            (cputype::CPU_TYPE_I386, cputype::CPU_SUBTYPE_I386_ALL) => Arch::X86,
187            (cputype::CPU_TYPE_I386, _) => Arch::X86Unknown,
188            (cputype::CPU_TYPE_X86_64, cputype::CPU_SUBTYPE_X86_64_ALL) => Arch::Amd64,
189            (cputype::CPU_TYPE_X86_64, cputype::CPU_SUBTYPE_X86_64_H) => Arch::Amd64h,
190            (cputype::CPU_TYPE_X86_64, _) => Arch::Amd64Unknown,
191            (cputype::CPU_TYPE_ARM64, cputype::CPU_SUBTYPE_ARM64_ALL) => Arch::Arm64,
192            (cputype::CPU_TYPE_ARM64, cputype::CPU_SUBTYPE_ARM64_V8) => Arch::Arm64V8,
193            (cputype::CPU_TYPE_ARM64, cputype::CPU_SUBTYPE_ARM64_E) => Arch::Arm64e,
194            (cputype::CPU_TYPE_ARM64, _) => Arch::Arm64Unknown,
195            (cputype::CPU_TYPE_ARM64_32, cputype::CPU_SUBTYPE_ARM64_32_ALL) => Arch::Arm64_32,
196            (cputype::CPU_TYPE_ARM64_32, cputype::CPU_SUBTYPE_ARM64_32_V8) => Arch::Arm64_32V8,
197            (cputype::CPU_TYPE_ARM64_32, _) => Arch::Arm64_32Unknown,
198            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_ALL) => Arch::Arm,
199            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V5TEJ) => Arch::ArmV5,
200            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V6) => Arch::ArmV6,
201            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V6M) => Arch::ArmV6m,
202            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7) => Arch::ArmV7,
203            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7F) => Arch::ArmV7f,
204            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7S) => Arch::ArmV7s,
205            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7K) => Arch::ArmV7k,
206            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7M) => Arch::ArmV7m,
207            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7EM) => Arch::ArmV7em,
208            (cputype::CPU_TYPE_ARM, _) => Arch::ArmUnknown,
209            (cputype::CPU_TYPE_POWERPC, cputype::CPU_SUBTYPE_POWERPC_ALL) => Arch::Ppc,
210            (cputype::CPU_TYPE_POWERPC64, cputype::CPU_SUBTYPE_POWERPC_ALL) => Arch::Ppc64,
211            (_, _) => Arch::Unknown,
212        }
213    }
214
215    /// The kind of this object, as specified in the Mach header.
216    pub fn kind(&self) -> ObjectKind {
217        match self.macho.header.filetype {
218            goblin::mach::header::MH_OBJECT => ObjectKind::Relocatable,
219            goblin::mach::header::MH_EXECUTE => ObjectKind::Executable,
220            goblin::mach::header::MH_FVMLIB => ObjectKind::Library,
221            goblin::mach::header::MH_CORE => ObjectKind::Dump,
222            goblin::mach::header::MH_PRELOAD => ObjectKind::Executable,
223            goblin::mach::header::MH_DYLIB => ObjectKind::Library,
224            goblin::mach::header::MH_DYLINKER => ObjectKind::Executable,
225            goblin::mach::header::MH_BUNDLE => ObjectKind::Library,
226            goblin::mach::header::MH_DSYM => ObjectKind::Debug,
227            goblin::mach::header::MH_KEXT_BUNDLE => ObjectKind::Library,
228            _ => ObjectKind::Other,
229        }
230    }
231
232    /// The address at which the image prefers to be loaded into memory.
233    ///
234    /// MachO files store all internal addresses as if it was loaded at that address. When the image
235    /// is actually loaded, that spot might already be taken by other images and so it must be
236    /// relocated to a new address. At runtime, a relocation table manages the arithmetics behind
237    /// this.
238    ///
239    /// Addresses used in `symbols` or `debug_session` have already been rebased relative to that
240    /// load address, so that the caller only has to deal with addresses relative to the actual
241    /// start of the image.
242    pub fn load_address(&self) -> u64 {
243        for seg in &self.macho.segments {
244            if seg.name().map(|name| name == "__TEXT").unwrap_or(false) {
245                return seg.vmaddr;
246            }
247        }
248
249        0
250    }
251
252    /// Determines whether this object exposes a public symbol table.
253    pub fn has_symbols(&self) -> bool {
254        self.macho.symbols.is_some()
255    }
256
257    /// Returns an iterator over symbols in the public symbol table.
258    pub fn symbols(&self) -> MachOSymbolIterator<'d> {
259        // Cache indices of code sections. These are either "__text" or "__stubs", always located in
260        // the "__TEXT" segment. It looks like each of those sections only occurs once, but to be
261        // safe they are collected into a vector.
262        let mut sections = SmallVec::new();
263        let mut section_index = 0;
264
265        'outer: for segment in &self.macho.segments {
266            if segment.name().ok() != Some("__TEXT") {
267                section_index += segment.nsects as usize;
268                continue;
269            }
270
271            for result in segment {
272                // Do not continue to iterate potentially broken section headers. This could lead to
273                // invalid section indices.
274                let section = match result {
275                    Ok((section, _data)) => section,
276                    Err(_) => break 'outer,
277                };
278
279                match section.name() {
280                    Ok("__text") | Ok("__stubs") => sections.push(section_index),
281                    _ => (),
282                }
283
284                section_index += 1;
285            }
286        }
287
288        MachOSymbolIterator {
289            symbols: self.macho.symbols(),
290            sections,
291            vmaddr: self.load_address(),
292            symbolmap: self.bcsymbolmap.clone(),
293        }
294    }
295
296    /// Returns an ordered map of symbols in the symbol table.
297    pub fn symbol_map(&self) -> SymbolMap<'d> {
298        self.symbols().collect()
299    }
300
301    /// Determines whether this object contains debug information.
302    pub fn has_debug_info(&self) -> bool {
303        self.has_section("debug_info")
304    }
305
306    /// Constructs a debugging session.
307    ///
308    /// A debugging session loads certain information from the object file and creates caches for
309    /// efficient access to various records in the debug information. Since this can be quite a
310    /// costly process, try to reuse the debugging session as long as possible.
311    ///
312    /// MachO files generally use DWARF debugging information, which is also used by ELF containers
313    /// on Linux.
314    ///
315    /// Constructing this session will also work if the object does not contain debugging
316    /// information, in which case the session will be a no-op. This can be checked via
317    /// [`has_debug_info`](struct.MachObject.html#method.has_debug_info).
318    pub fn debug_session(&self) -> Result<DwarfDebugSession<'d>, DwarfError> {
319        let symbols = self.symbol_map();
320        let mut session =
321            DwarfDebugSession::parse(self, symbols, self.load_address() as i64, self.kind())?;
322        session.load_symbolmap(self.bcsymbolmap.clone());
323        Ok(session)
324    }
325
326    /// Determines whether this object contains stack unwinding information.
327    pub fn has_unwind_info(&self) -> bool {
328        self.has_section("eh_frame")
329            || self.has_section("debug_frame")
330            || self.has_section("unwind_info")
331    }
332
333    /// Determines whether this object contains embedded source.
334    pub fn has_sources(&self) -> bool {
335        false
336    }
337
338    /// Determines whether this object is malformed and was only partially parsed
339    pub fn is_malformed(&self) -> bool {
340        false
341    }
342
343    /// Returns the raw data of the ELF file.
344    pub fn data(&self) -> &'d [u8] {
345        self.data
346    }
347
348    /// Checks whether this mach object contains hidden symbols.
349    ///
350    /// This is an indication that BCSymbolMaps are needed to symbolicate crash reports correctly.
351    pub fn requires_symbolmap(&self) -> bool {
352        self.symbols()
353            .any(|s| s.name().is_some_and(|n| n.starts_with(SWIFT_HIDDEN_PREFIX)))
354    }
355}
356
357impl fmt::Debug for MachObject<'_> {
358    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
359        f.debug_struct("MachObject")
360            .field("code_id", &self.code_id())
361            .field("debug_id", &self.debug_id())
362            .field("arch", &self.arch())
363            .field("kind", &self.kind())
364            .field("load_address", &format_args!("{:#x}", self.load_address()))
365            .field("has_symbols", &self.has_symbols())
366            .field("has_debug_info", &self.has_debug_info())
367            .field("has_unwind_info", &self.has_unwind_info())
368            .field("is_malformed", &self.is_malformed())
369            .finish()
370    }
371}
372
373impl<'slf, 'd: 'slf> AsSelf<'slf> for MachObject<'d> {
374    type Ref = MachObject<'slf>;
375
376    fn as_self(&'slf self) -> &'slf Self::Ref {
377        self
378    }
379}
380
381impl<'d> Parse<'d> for MachObject<'d> {
382    type Error = MachError;
383
384    fn test(data: &[u8]) -> bool {
385        Self::test(data)
386    }
387
388    fn parse(data: &'d [u8]) -> Result<Self, MachError> {
389        Self::parse(data)
390    }
391}
392
393impl<'data: 'object, 'object> ObjectLike<'data, 'object> for MachObject<'data> {
394    type Error = DwarfError;
395    type Session = DwarfDebugSession<'data>;
396    type SymbolIterator = MachOSymbolIterator<'data>;
397
398    fn file_format(&self) -> FileFormat {
399        self.file_format()
400    }
401
402    fn code_id(&self) -> Option<CodeId> {
403        self.code_id()
404    }
405
406    fn debug_id(&self) -> DebugId {
407        self.debug_id()
408    }
409
410    fn arch(&self) -> Arch {
411        self.arch()
412    }
413
414    fn kind(&self) -> ObjectKind {
415        self.kind()
416    }
417
418    fn load_address(&self) -> u64 {
419        self.load_address()
420    }
421
422    fn has_symbols(&self) -> bool {
423        self.has_symbols()
424    }
425
426    fn symbols(&self) -> Self::SymbolIterator {
427        self.symbols()
428    }
429
430    fn symbol_map(&self) -> SymbolMap<'data> {
431        self.symbol_map()
432    }
433
434    fn has_debug_info(&self) -> bool {
435        self.has_debug_info()
436    }
437
438    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
439        self.debug_session()
440    }
441
442    fn has_unwind_info(&self) -> bool {
443        self.has_unwind_info()
444    }
445
446    fn has_sources(&self) -> bool {
447        self.has_sources()
448    }
449
450    fn is_malformed(&self) -> bool {
451        self.is_malformed()
452    }
453}
454
455impl<'data> Dwarf<'data> for MachObject<'data> {
456    fn endianity(&self) -> Endian {
457        if self.macho.little_endian {
458            Endian::Little
459        } else {
460            Endian::Big
461        }
462    }
463
464    fn raw_section(&self, section_name: &str) -> Option<DwarfSection<'data>> {
465        for segment in &self.macho.segments {
466            for section in segment.into_iter() {
467                let (header, data) = section.ok()?;
468                if let Ok(sec) = header.name() {
469                    if sec.starts_with("__") && map_section_name(&sec[2..]) == section_name {
470                        // In some cases, dsymutil leaves sections headers but removes their
471                        // data from the file. While the addr and size parameters are still
472                        // set, `header.offset` is 0 in that case. We skip them just like the
473                        // section was missing to avoid loading invalid data.
474                        if header.offset == 0 {
475                            return None;
476                        }
477
478                        return Some(DwarfSection {
479                            data: Cow::Borrowed(data),
480                            address: header.addr,
481                            offset: u64::from(header.offset),
482                            align: u64::from(header.align),
483                        });
484                    }
485                }
486            }
487        }
488
489        None
490    }
491}
492
493/// See <https://llvm.org/doxygen/MachOObjectFile_8cpp_source.html#l05341>.
494fn map_section_name(name: &str) -> &str {
495    match name {
496        "debug_str_offs" => "debug_str_offsets",
497        _ => name,
498    }
499}
500/// An iterator over symbols in the MachO file.
501///
502/// Returned by [`MachObject::symbols`](struct.MachObject.html#method.symbols).
503pub struct MachOSymbolIterator<'data> {
504    symbols: mach::symbols::SymbolIterator<'data>,
505    sections: SmallVec<[usize; 2]>,
506    vmaddr: u64,
507    symbolmap: Option<Arc<BcSymbolMap<'data>>>,
508}
509
510impl<'data> Iterator for MachOSymbolIterator<'data> {
511    type Item = Symbol<'data>;
512
513    fn next(&mut self) -> Option<Self::Item> {
514        for next in &mut self.symbols {
515            let (mut name, nlist) = next.ok()?;
516
517            // Sanity check of the symbol address. Since we only intend to iterate over function
518            // symbols, they need to be mapped after the image's vmaddr.
519            if nlist.n_value < self.vmaddr {
520                continue;
521            }
522
523            // We are only interested in symbols pointing to a code section (type `N_SECT`). The
524            // section index is incremented by one to leave room for `NO_SECT` (0). Section indexes
525            // of the code sections have been passed in via `self.sections`.
526            let in_valid_section = !nlist.is_stab()
527                && nlist.get_type() == mach::symbols::N_SECT
528                && nlist.n_sect != (mach::symbols::NO_SECT as usize)
529                && self.sections.contains(&(nlist.n_sect - 1));
530
531            if !in_valid_section {
532                continue;
533            }
534
535            if let Some(symbolmap) = self.symbolmap.as_ref() {
536                name = symbolmap.resolve(name);
537            }
538
539            // Trim leading underscores from mangled C++ names.
540            if let Some(tail) = name.strip_prefix('_') {
541                if !name.starts_with(SWIFT_HIDDEN_PREFIX) {
542                    name = tail;
543                }
544            }
545
546            return Some(Symbol {
547                name: Some(Cow::Borrowed(name)),
548                address: nlist.n_value - self.vmaddr,
549                size: 0, // Computed in `SymbolMap`
550            });
551        }
552
553        None
554    }
555}
556
557/// An iterator over objects in a [`FatMachO`](struct.FatMachO.html).
558///
559/// Objects are parsed just-in-time while iterating, which may result in errors. The iterator is
560/// still valid afterwards, however, and can be used to resolve the next object.
561pub struct FatMachObjectIterator<'d, 'a> {
562    iter: mach::FatArchIterator<'a>,
563    remaining: usize,
564    data: &'d [u8],
565}
566
567impl<'d> Iterator for FatMachObjectIterator<'d, '_> {
568    type Item = Result<MachObject<'d>, MachError>;
569
570    fn next(&mut self) -> Option<Self::Item> {
571        if self.remaining == 0 {
572            return None;
573        }
574
575        self.remaining -= 1;
576        match self.iter.next() {
577            Some(Ok(arch)) => {
578                let start = (arch.offset as usize).min(self.data.len());
579                let end = (arch.offset as usize + arch.size as usize).min(self.data.len());
580                Some(MachObject::parse(&self.data[start..end]))
581            }
582            Some(Err(error)) => Some(Err(MachError::new(error))),
583            None => None,
584        }
585    }
586
587    fn size_hint(&self) -> (usize, Option<usize>) {
588        (self.remaining, Some(self.remaining))
589    }
590}
591
592impl std::iter::FusedIterator for FatMachObjectIterator<'_, '_> {}
593impl ExactSizeIterator for FatMachObjectIterator<'_, '_> {}
594
595/// A fat MachO container that hosts one or more [`MachObject`]s.
596///
597/// [`MachObject`]: struct.MachObject.html
598pub struct FatMachO<'d> {
599    fat: mach::MultiArch<'d>,
600    data: &'d [u8],
601}
602
603impl<'d> FatMachO<'d> {
604    /// Tests whether the buffer could contain an ELF object.
605    pub fn test(data: &[u8]) -> bool {
606        matches!(MachArchive::is_fat(data), Some(true))
607    }
608
609    /// Tries to parse a fat MachO container from the given slice.
610    pub fn parse(data: &'d [u8]) -> Result<Self, MachError> {
611        mach::MultiArch::new(data)
612            .map(|fat| FatMachO { fat, data })
613            .map_err(MachError::new)
614    }
615
616    /// Returns an iterator over objects in this container.
617    pub fn objects(&self) -> FatMachObjectIterator<'d, '_> {
618        FatMachObjectIterator {
619            iter: self.fat.iter_arches(),
620            remaining: self.fat.narches,
621            data: self.data,
622        }
623    }
624
625    /// Returns the number of objects in this archive.
626    pub fn object_count(&self) -> usize {
627        self.fat.narches
628    }
629
630    /// Resolves the object at the given index.
631    ///
632    /// Returns `Ok(None)` if the index is out of bounds, or `Err` if the object exists but cannot
633    /// be parsed.
634    pub fn object_by_index(&self, index: usize) -> Result<Option<MachObject<'d>>, MachError> {
635        let arch = match self.fat.iter_arches().nth(index) {
636            Some(arch) => arch.map_err(MachError::new)?,
637            None => return Ok(None),
638        };
639
640        let start = (arch.offset as usize).min(self.data.len());
641        let end = (arch.offset as usize + arch.size as usize).min(self.data.len());
642        MachObject::parse(&self.data[start..end]).map(Some)
643    }
644}
645
646impl fmt::Debug for FatMachO<'_> {
647    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
648        f.debug_struct("FatMachO").field("fat", &self.fat).finish()
649    }
650}
651
652impl<'slf, 'd: 'slf> AsSelf<'slf> for FatMachO<'d> {
653    type Ref = FatMachO<'slf>;
654
655    fn as_self(&'slf self) -> &'slf Self::Ref {
656        self
657    }
658}
659
660#[allow(clippy::large_enum_variant)]
661enum MachObjectIteratorInner<'d, 'a> {
662    Single(MonoArchiveObjects<'d, MachObject<'d>>),
663    Archive(FatMachObjectIterator<'d, 'a>),
664}
665
666/// An iterator over objects in a [`MachArchive`](struct.MachArchive.html).
667pub struct MachObjectIterator<'d, 'a>(MachObjectIteratorInner<'d, 'a>);
668
669impl<'d> Iterator for MachObjectIterator<'d, '_> {
670    type Item = Result<MachObject<'d>, MachError>;
671
672    fn next(&mut self) -> Option<Self::Item> {
673        match self.0 {
674            MachObjectIteratorInner::Single(ref mut iter) => iter.next(),
675            MachObjectIteratorInner::Archive(ref mut iter) => iter.next(),
676        }
677    }
678
679    fn size_hint(&self) -> (usize, Option<usize>) {
680        match self.0 {
681            MachObjectIteratorInner::Single(ref iter) => iter.size_hint(),
682            MachObjectIteratorInner::Archive(ref iter) => iter.size_hint(),
683        }
684    }
685}
686
687impl std::iter::FusedIterator for MachObjectIterator<'_, '_> {}
688impl ExactSizeIterator for MachObjectIterator<'_, '_> {}
689
690#[derive(Debug)]
691enum MachArchiveInner<'d> {
692    Single(MonoArchive<'d, MachObject<'d>>),
693    Archive(FatMachO<'d>),
694}
695
696/// An archive that can consist of a single [`MachObject`] or a [`FatMachO`] container.
697///
698/// Executables and dSYM files on macOS can be a so-called _Fat Mach Object_: It contains multiple
699/// objects for several architectures. When loading this object, the operating system determines the
700/// object corresponding to the host's architecture. This allows to distribute a single binary with
701/// optimizations for specific CPUs, which is frequently done on iOS.
702///
703/// To abstract over the differences, `MachArchive` simulates the archive interface also for single
704/// Mach objects. This allows uniform access to both file types.
705///
706/// [`MachObject`]: struct.MachObject.html
707/// [`FatMachO`]: struct.FatMachO.html
708#[derive(Debug)]
709pub struct MachArchive<'d>(MachArchiveInner<'d>);
710
711impl<'d> MachArchive<'d> {
712    /// Tests whether the buffer contains either a Mach Object or a Fat Mach Object.
713    pub fn test(data: &[u8]) -> bool {
714        Self::is_fat(data).is_some()
715    }
716
717    /// Determines if the binary content is a macho object, and whether or not it is fat
718    fn is_fat(data: &[u8]) -> Option<bool> {
719        let (magic, _maybe_ctx) = goblin::mach::parse_magic_and_ctx(data, 0).ok()?;
720        match magic {
721            goblin::mach::fat::FAT_MAGIC => {
722                use scroll::Pread;
723                // so this is kind of stupid but java class files share the same cutesy magic
724                // as a macho fat file (CAFEBABE).  This means that we often claim that a java
725                // class file is actually a macho binary but it's not.  The next 32 bits encode
726                // the number of embedded architectures in a fat mach.  In case of a JAR file
727                // we have 2 bytes for minor version and 2 bytes for major version of the class
728                // file format.
729                //
730                // The internet suggests the first public version of Java had the class version
731                // 45.  Thus the logic applied here is that if the number is >= 45 we're more
732                // likely to have a java class file than a macho file with 45 architectures
733                // which should be very rare.
734                //
735                // https://docs.oracle.com/javase/specs/jvms/se6/html/ClassFile.doc.html
736                let narches = data.pread_with::<u32>(4, scroll::BE).ok()?;
737
738                if narches < 45 {
739                    Some(true)
740                } else {
741                    None
742                }
743            }
744            goblin::mach::header::MH_CIGAM_64
745            | goblin::mach::header::MH_CIGAM
746            | goblin::mach::header::MH_MAGIC_64
747            | goblin::mach::header::MH_MAGIC => Some(false),
748            _ => None,
749        }
750    }
751
752    /// Tries to parse a Mach archive from the given slice.
753    pub fn parse(data: &'d [u8]) -> Result<Self, MachError> {
754        Ok(Self(match Self::is_fat(data) {
755            Some(true) => MachArchiveInner::Archive(FatMachO::parse(data)?),
756            // Fall back to mach parsing to receive a meaningful error message from goblin
757            _ => MachArchiveInner::Single(MonoArchive::new(data)),
758        }))
759    }
760
761    /// Returns an iterator over all objects contained in this archive.
762    pub fn objects(&self) -> MachObjectIterator<'d, '_> {
763        MachObjectIterator(match self.0 {
764            MachArchiveInner::Single(ref inner) => MachObjectIteratorInner::Single(inner.objects()),
765            MachArchiveInner::Archive(ref inner) => {
766                MachObjectIteratorInner::Archive(inner.objects())
767            }
768        })
769    }
770
771    /// Returns the number of objects in this archive.
772    pub fn object_count(&self) -> usize {
773        match self.0 {
774            MachArchiveInner::Single(ref inner) => inner.object_count(),
775            MachArchiveInner::Archive(ref inner) => inner.object_count(),
776        }
777    }
778
779    /// Resolves the object at the given index.
780    ///
781    /// Returns `Ok(None)` if the index is out of bounds, or `Err` if the object exists but cannot
782    /// be parsed.
783    pub fn object_by_index(&self, index: usize) -> Result<Option<MachObject<'d>>, MachError> {
784        match self.0 {
785            MachArchiveInner::Single(ref inner) => inner.object_by_index(index),
786            MachArchiveInner::Archive(ref inner) => inner.object_by_index(index),
787        }
788    }
789
790    /// Returns whether this is a multi-object archive.
791    ///
792    /// This may also return true if there is only a single object inside the archive.
793    pub fn is_multi(&self) -> bool {
794        match self.0 {
795            MachArchiveInner::Archive(_) => true,
796            MachArchiveInner::Single(_) => false,
797        }
798    }
799}
800
801impl<'slf, 'd: 'slf> AsSelf<'slf> for MachArchive<'d> {
802    type Ref = MachArchive<'slf>;
803
804    fn as_self(&'slf self) -> &'slf Self::Ref {
805        self
806    }
807}
808
809#[cfg(test)]
810mod tests {
811
812    use super::*;
813
814    #[test]
815    fn test_bcsymbolmap() {
816        let object_data =
817            std::fs::read("tests/fixtures/2d10c42f-591d-3265-b147-78ba0868073f.dwarf-hidden")
818                .unwrap();
819        let mut object = MachObject::parse(&object_data).unwrap();
820
821        // make sure that we get hidden symbols/filenames before loading the symbolmap
822        let mut symbols = object.symbols();
823        let symbol = symbols.next().unwrap();
824        assert_eq!(symbol.name.unwrap(), "__hidden#0_");
825
826        let session = object.debug_session().unwrap();
827        let mut files = session.files();
828        let file = files.next().unwrap().unwrap();
829        assert_eq!(&file.path_str(), "__hidden#41_/__hidden#42_");
830        assert_eq!(
831            &file.abs_path_str(),
832            // XXX: the path joining logic usually detects absolute paths (see below), but that does
833            // not work for these hidden paths.
834            "__hidden#41_/__hidden#41_/__hidden#42_"
835        );
836
837        let mut functions = session.functions();
838        let function = functions.next().unwrap().unwrap();
839        assert_eq!(&function.name, "__hidden#0_");
840        assert_eq!(&function.compilation_dir, b"__hidden#41_");
841        assert_eq!(
842            &function.lines[0].file.path_str(),
843            "__hidden#41_/__hidden#42_"
844        );
845
846        let fn_with_inlinees = functions
847            .filter_map(|f| f.ok())
848            .find(|f| !f.inlinees.is_empty())
849            .unwrap();
850        let inlinee = fn_with_inlinees.inlinees.first().unwrap();
851        assert_eq!(&inlinee.name, "__hidden#146_");
852
853        // loads the symbolmap
854        let bc_symbol_map_data =
855            std::fs::read("tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap")
856                .unwrap();
857        let bc_symbol_map = BcSymbolMap::parse(&bc_symbol_map_data).unwrap();
858        object.load_symbolmap(bc_symbol_map);
859
860        // make sure we get resolved symbols/filenames now
861        let mut symbols = object.symbols();
862        let symbol = symbols.next().unwrap();
863        assert_eq!(symbol.name.unwrap(), "-[SentryMessage initWithFormatted:]");
864
865        let symbol = symbols.next().unwrap();
866        assert_eq!(symbol.name.unwrap(), "-[SentryMessage setMessage:]");
867
868        let session = object.debug_session().unwrap();
869        let mut files = session.files();
870        let file = files.next().unwrap().unwrap();
871        assert_eq!(
872            &file.path_str(),
873            "/Users/philipphofmann/git-repos/sentry-cocoa/Sources/Sentry/SentryMessage.m"
874        );
875        assert_eq!(
876            &file.abs_path_str(),
877            "/Users/philipphofmann/git-repos/sentry-cocoa/Sources/Sentry/SentryMessage.m"
878        );
879
880        let mut functions = session.functions();
881        let function = functions.next().unwrap().unwrap();
882        assert_eq!(&function.name, "-[SentryMessage initWithFormatted:]");
883        assert_eq!(
884            &function.compilation_dir,
885            b"/Users/philipphofmann/git-repos/sentry-cocoa"
886        );
887        assert_eq!(
888            &function.lines[0].file.path_str(),
889            "/Users/philipphofmann/git-repos/sentry-cocoa/Sources/Sentry/SentryMessage.m"
890        );
891
892        let fn_with_inlinees = functions
893            .filter_map(|f| f.ok())
894            .find(|f| !f.inlinees.is_empty())
895            .unwrap();
896        let inlinee = fn_with_inlinees.inlinees.first().unwrap();
897        assert_eq!(&inlinee.name, "prepareReportWriter");
898    }
899
900    #[test]
901    fn test_overflow_multiarch() {
902        let data = [
903            0xbe, 0xba, 0xfe, 0xca, // magic
904            0x00, 0x00, 0x00, 0x01, // num arches = 1
905            0x00, 0x00, 0x00, 0x00, // cpu type
906            0x00, 0x00, 0x00, 0x00, // cpu subtype
907            0x00, 0xff, 0xff, 0xff, // offset
908            0x00, 0x00, 0xff, 0xff, // size
909            0x00, 0x00, 0x00, 0x00, // align
910        ];
911
912        let fat = FatMachO::parse(&data).unwrap();
913
914        let obj = fat.object_by_index(0);
915        assert!(obj.is_err());
916
917        let mut iter = fat.objects();
918        assert!(iter.next().unwrap().is_err());
919    }
920
921    #[test]
922    fn test_section_access() {
923        let data = [
924            0xfe, 0xed, 0xfa, 0xcf, 0x1, 0x0, 0x0, 0x0, 0x0, 0x2, 0xed, 0xfa, 0xce, 0x6f, 0x73,
925            0x6f, 0x0, 0x0, 0x0, 0x7, 0x0, 0x0, 0x0, 0x4d, 0x4f, 0x44, 0x55, 0x4c, 0x40, 0x20, 0x0,
926            0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x4d, 0xc2, 0xc2, 0xc2, 0xc2,
927            0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xca, 0x7a, 0xfe, 0xba, 0xbe, 0x0, 0x0, 0x0, 0x20,
928            0x43, 0x2f, 0x0, 0x32, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0x0, 0x0, 0x0, 0x4d, 0x4f,
929            0x44, 0x55, 0x4c, 0x40, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
930            0x0, 0x0, 0x2a, 0x78, 0x6e, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2,
931            0xc2, 0xc2, 0xc2, 0xc2, 0xc6, 0xd5, 0xc2, 0xc2, 0x1f, 0x1f,
932        ];
933
934        let obj = MachObject::parse(&data).unwrap();
935
936        assert!(!obj.has_debug_info());
937    }
938
939    #[test]
940    fn test_invalid_symbols() {
941        let data = std::fs::read("tests/fixtures/invalid-symbols.fuzzed").unwrap();
942
943        let obj = MachObject::parse(&data).unwrap();
944
945        let _ = obj.symbol_map();
946    }
947}