Skip to main content

symbolic_debuginfo/macho/
mod.rs

1//! Support for Mach Objects, used on macOS and iOS.
2
3use std::borrow::Cow;
4use std::error::Error;
5use std::fmt;
6use std::sync::Arc;
7
8use goblin::mach;
9use smallvec::SmallVec;
10use thiserror::Error;
11
12use symbolic_common::{Arch, AsSelf, CodeId, DebugId, Uuid};
13
14use crate::base::*;
15use crate::dwarf::{Dwarf, DwarfDebugSession, DwarfError, DwarfSection, Endian};
16use crate::ParseObjectOptions;
17pub(crate) use mono_archive::{MonoArchive, MonoArchiveObjects};
18
19mod bcsymbolmap;
20pub mod compact;
21mod mono_archive;
22
23pub use bcsymbolmap::*;
24pub use compact::*;
25
26/// Prefix for hidden symbols from Apple BCSymbolMap builds.
27const SWIFT_HIDDEN_PREFIX: &str = "__hidden#";
28
29/// An error when dealing with [`MachObject`](struct.MachObject.html).
30#[derive(Debug, Error)]
31#[error("invalid MachO file")]
32pub struct MachError {
33    #[source]
34    source: Option<Box<dyn Error + Send + Sync + 'static>>,
35}
36
37impl MachError {
38    /// Creates a new MachO error from an arbitrary error payload.
39    fn new<E>(source: E) -> Self
40    where
41        E: Into<Box<dyn Error + Send + Sync>>,
42    {
43        let source = Some(source.into());
44        Self { source }
45    }
46}
47
48impl From<goblin::error::Error> for MachError {
49    fn from(e: goblin::error::Error) -> Self {
50        Self::new(e)
51    }
52}
53
54impl From<scroll::Error> for MachError {
55    fn from(e: scroll::Error) -> Self {
56        Self::new(e)
57    }
58}
59
60/// Mach Object containers, used for executables and debug companions on macOS and iOS.
61pub struct MachObject<'d> {
62    macho: mach::MachO<'d>,
63    data: &'d [u8],
64    bcsymbolmap: Option<Arc<BcSymbolMap<'d>>>,
65}
66
67impl<'d> MachObject<'d> {
68    /// Tests whether the buffer could contain a MachO object.
69    pub fn test(data: &[u8]) -> bool {
70        matches!(MachArchive::is_fat(data), Some(false))
71    }
72
73    /// Tries to parse a MachO from the given slice.
74    pub fn parse(data: &'d [u8]) -> Result<Self, MachError> {
75        mach::MachO::parse(data, 0)
76            .map(|macho| MachObject {
77                macho,
78                data,
79                bcsymbolmap: None,
80            })
81            .map_err(MachError::new)
82    }
83
84    /// Parses and loads the [`BcSymbolMap`] into the object.
85    ///
86    /// The bitcode symbol map must match the object, there is nothing in the symbol map
87    /// which allows this call to verify this.
88    ///
89    /// Once the symbolmap is loaded this object will transparently resolve any hidden
90    /// symbols using the provided symbolmap.
91    ///
92    /// # Examples
93    ///
94    /// ```
95    /// use symbolic_debuginfo::macho::{BcSymbolMap, MachObject};
96    ///
97    /// // let object_data = std::fs::read("dSYMs/.../Resources/DWARF/object").unwrap();
98    /// # let object_data =
99    /// #     std::fs::read("tests/fixtures/2d10c42f-591d-3265-b147-78ba0868073f.dwarf-hidden")
100    /// #         .unwrap();
101    /// let mut object = MachObject::parse(&object_data).unwrap();
102    ///
103    /// let map = object.symbol_map();
104    /// let symbol = map.lookup(0x5a74).unwrap();
105    /// assert_eq!(symbol.name.as_ref().map(|n| n.to_owned()).unwrap(), "__hidden#0_");
106    ///
107    /// // let bc_symbol_map_data =
108    /// //     std::fs::read("BCSymbolMaps/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap")
109    /// //     .unwrap();
110    /// # let bc_symbol_map_data =
111    /// #     std::fs::read("tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap")
112    /// #         .unwrap();
113    /// let bc_symbol_map = BcSymbolMap::parse(&bc_symbol_map_data).unwrap();
114    /// object.load_symbolmap(bc_symbol_map);
115    ///
116    ///
117    /// let map = object.symbol_map();
118    /// let symbol = map.lookup(0x5a74).unwrap();
119    /// assert_eq!(
120    ///     symbol.name.as_ref().map(|n| n.to_owned()).unwrap(),
121    ///     "-[SentryMessage initWithFormatted:]",
122    /// );
123    /// ```
124    // TODO: re-enable this deprecation once we have a convenient way of creating an owned SymCache Transformer.
125    // #[deprecated = "use the symbolic-symcache `Transformer` functionality instead"]
126    pub fn load_symbolmap(&mut self, symbolmap: BcSymbolMap<'d>) {
127        self.bcsymbolmap = Some(Arc::new(symbolmap));
128    }
129
130    /// Gets the Compact Unwind Info of this object, if any exists.
131    pub fn compact_unwind_info(&self) -> Result<Option<CompactUnwindInfoIter<'d>>, MachError> {
132        if let Some(section) = self.section("unwind_info") {
133            if let Cow::Borrowed(section) = section.data {
134                let arch = self.arch();
135                let is_little_endian = self.endianity() == Endian::Little;
136                return Ok(Some(CompactUnwindInfoIter::new(
137                    section,
138                    is_little_endian,
139                    arch,
140                )?));
141            }
142        }
143        Ok(None)
144    }
145
146    /// The container file format, which is always `FileFormat::MachO`.
147    pub fn file_format(&self) -> FileFormat {
148        FileFormat::MachO
149    }
150
151    fn find_uuid(&self) -> Option<Uuid> {
152        for cmd in &self.macho.load_commands {
153            if let mach::load_command::CommandVariant::Uuid(ref uuid_cmd) = cmd.command {
154                return Uuid::from_slice(&uuid_cmd.uuid).ok();
155            }
156        }
157
158        None
159    }
160
161    /// The name of the dylib if any.
162    pub fn name(&self) -> Option<&'d str> {
163        self.macho.name
164    }
165
166    /// The code identifier of this object.
167    ///
168    /// Mach objects use a UUID which is specified in the load commands that are part of the Mach
169    /// header. This UUID is generated at compile / link time and is usually unique per compilation.
170    pub fn code_id(&self) -> Option<CodeId> {
171        let uuid = self.find_uuid()?;
172        Some(CodeId::from_binary(&uuid.as_bytes()[..]))
173    }
174
175    /// The debug information identifier of a MachO file.
176    ///
177    /// This uses the same UUID as `code_id`.
178    pub fn debug_id(&self) -> DebugId {
179        self.find_uuid().map(DebugId::from_uuid).unwrap_or_default()
180    }
181
182    /// The CPU architecture of this object, as specified in the Mach header.
183    pub fn arch(&self) -> Arch {
184        use goblin::mach::constants::cputype;
185
186        match (self.macho.header.cputype(), self.macho.header.cpusubtype()) {
187            (cputype::CPU_TYPE_I386, cputype::CPU_SUBTYPE_I386_ALL) => Arch::X86,
188            (cputype::CPU_TYPE_I386, _) => Arch::X86Unknown,
189            (cputype::CPU_TYPE_X86_64, cputype::CPU_SUBTYPE_X86_64_ALL) => Arch::Amd64,
190            (cputype::CPU_TYPE_X86_64, cputype::CPU_SUBTYPE_X86_64_H) => Arch::Amd64h,
191            (cputype::CPU_TYPE_X86_64, _) => Arch::Amd64Unknown,
192            (cputype::CPU_TYPE_ARM64, cputype::CPU_SUBTYPE_ARM64_ALL) => Arch::Arm64,
193            (cputype::CPU_TYPE_ARM64, cputype::CPU_SUBTYPE_ARM64_V8) => Arch::Arm64V8,
194            (cputype::CPU_TYPE_ARM64, cputype::CPU_SUBTYPE_ARM64_E) => Arch::Arm64e,
195            (cputype::CPU_TYPE_ARM64, _) => Arch::Arm64Unknown,
196            (cputype::CPU_TYPE_ARM64_32, cputype::CPU_SUBTYPE_ARM64_32_ALL) => Arch::Arm64_32,
197            (cputype::CPU_TYPE_ARM64_32, cputype::CPU_SUBTYPE_ARM64_32_V8) => Arch::Arm64_32V8,
198            (cputype::CPU_TYPE_ARM64_32, _) => Arch::Arm64_32Unknown,
199            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_ALL) => Arch::Arm,
200            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V5TEJ) => Arch::ArmV5,
201            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V6) => Arch::ArmV6,
202            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V6M) => Arch::ArmV6m,
203            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7) => Arch::ArmV7,
204            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7F) => Arch::ArmV7f,
205            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7S) => Arch::ArmV7s,
206            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7K) => Arch::ArmV7k,
207            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7M) => Arch::ArmV7m,
208            (cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7EM) => Arch::ArmV7em,
209            (cputype::CPU_TYPE_ARM, _) => Arch::ArmUnknown,
210            (cputype::CPU_TYPE_POWERPC, cputype::CPU_SUBTYPE_POWERPC_ALL) => Arch::Ppc,
211            (cputype::CPU_TYPE_POWERPC64, cputype::CPU_SUBTYPE_POWERPC_ALL) => Arch::Ppc64,
212            (_, _) => Arch::Unknown,
213        }
214    }
215
216    /// The kind of this object, as specified in the Mach header.
217    pub fn kind(&self) -> ObjectKind {
218        match self.macho.header.filetype {
219            goblin::mach::header::MH_OBJECT => ObjectKind::Relocatable,
220            goblin::mach::header::MH_EXECUTE => ObjectKind::Executable,
221            goblin::mach::header::MH_FVMLIB => ObjectKind::Library,
222            goblin::mach::header::MH_CORE => ObjectKind::Dump,
223            goblin::mach::header::MH_PRELOAD => ObjectKind::Executable,
224            goblin::mach::header::MH_DYLIB => ObjectKind::Library,
225            goblin::mach::header::MH_DYLINKER => ObjectKind::Executable,
226            goblin::mach::header::MH_BUNDLE => ObjectKind::Library,
227            goblin::mach::header::MH_DSYM => ObjectKind::Debug,
228            goblin::mach::header::MH_KEXT_BUNDLE => ObjectKind::Library,
229            _ => ObjectKind::Other,
230        }
231    }
232
233    /// The address at which the image prefers to be loaded into memory.
234    ///
235    /// MachO files store all internal addresses as if it was loaded at that address. When the image
236    /// is actually loaded, that spot might already be taken by other images and so it must be
237    /// relocated to a new address. At runtime, a relocation table manages the arithmetics behind
238    /// this.
239    ///
240    /// Addresses used in `symbols` or `debug_session` have already been rebased relative to that
241    /// load address, so that the caller only has to deal with addresses relative to the actual
242    /// start of the image.
243    pub fn load_address(&self) -> u64 {
244        for seg in &self.macho.segments {
245            if seg.name().map(|name| name == "__TEXT").unwrap_or(false) {
246                return seg.vmaddr;
247            }
248        }
249
250        0
251    }
252
253    /// Determines whether this object exposes a public symbol table.
254    pub fn has_symbols(&self) -> bool {
255        self.macho.symbols.is_some()
256    }
257
258    /// Returns an iterator over symbols in the public symbol table.
259    pub fn symbols(&self) -> MachOSymbolIterator<'d> {
260        // Cache indices of code sections. These are either "__text" or "__stubs", always located in
261        // the "__TEXT" segment. It looks like each of those sections only occurs once, but to be
262        // safe they are collected into a vector.
263        let mut sections = SmallVec::new();
264        let mut section_index = 0;
265
266        'outer: for segment in &self.macho.segments {
267            if segment.name().ok() != Some("__TEXT") {
268                section_index += segment.nsects as usize;
269                continue;
270            }
271
272            for result in segment {
273                // Do not continue to iterate potentially broken section headers. This could lead to
274                // invalid section indices.
275                let section = match result {
276                    Ok((section, _data)) => section,
277                    Err(_) => break 'outer,
278                };
279
280                match section.name() {
281                    Ok("__text") | Ok("__stubs") => sections.push(section_index),
282                    _ => (),
283                }
284
285                section_index += 1;
286            }
287        }
288
289        MachOSymbolIterator {
290            symbols: self.macho.symbols(),
291            sections,
292            vmaddr: self.load_address(),
293            symbolmap: self.bcsymbolmap.clone(),
294        }
295    }
296
297    /// Returns an ordered map of symbols in the symbol table.
298    pub fn symbol_map(&self) -> SymbolMap<'d> {
299        self.symbols().collect()
300    }
301
302    /// Determines whether this object contains debug information.
303    pub fn has_debug_info(&self) -> bool {
304        self.has_section("debug_info")
305    }
306
307    /// Constructs a debugging session.
308    ///
309    /// A debugging session loads certain information from the object file and creates caches for
310    /// efficient access to various records in the debug information. Since this can be quite a
311    /// costly process, try to reuse the debugging session as long as possible.
312    ///
313    /// MachO files generally use DWARF debugging information, which is also used by ELF containers
314    /// on Linux.
315    ///
316    /// Constructing this session will also work if the object does not contain debugging
317    /// information, in which case the session will be a no-op. This can be checked via
318    /// [`has_debug_info`](struct.MachObject.html#method.has_debug_info).
319    pub fn debug_session(&self) -> Result<DwarfDebugSession<'d>, DwarfError> {
320        let symbols = self.symbol_map();
321        let mut session =
322            DwarfDebugSession::parse(self, symbols, self.load_address() as i64, self.kind())?;
323        session.load_symbolmap(self.bcsymbolmap.clone());
324        Ok(session)
325    }
326
327    /// Determines whether this object contains stack unwinding information.
328    pub fn has_unwind_info(&self) -> bool {
329        self.has_section("eh_frame")
330            || self.has_section("debug_frame")
331            || self.has_section("unwind_info")
332    }
333
334    /// Determines whether this object contains embedded source.
335    pub fn has_sources(&self) -> bool {
336        false
337    }
338
339    /// Determines whether this object is malformed and was only partially parsed
340    pub fn is_malformed(&self) -> bool {
341        false
342    }
343
344    /// Returns the raw data of the ELF file.
345    pub fn data(&self) -> &'d [u8] {
346        self.data
347    }
348
349    /// Checks whether this mach object contains hidden symbols.
350    ///
351    /// This is an indication that BCSymbolMaps are needed to symbolicate crash reports correctly.
352    pub fn requires_symbolmap(&self) -> bool {
353        self.symbols()
354            .any(|s| s.name().is_some_and(|n| n.starts_with(SWIFT_HIDDEN_PREFIX)))
355    }
356}
357
358impl fmt::Debug for MachObject<'_> {
359    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
360        f.debug_struct("MachObject")
361            .field("code_id", &self.code_id())
362            .field("debug_id", &self.debug_id())
363            .field("arch", &self.arch())
364            .field("kind", &self.kind())
365            .field("load_address", &format_args!("{:#x}", self.load_address()))
366            .field("has_symbols", &self.has_symbols())
367            .field("has_debug_info", &self.has_debug_info())
368            .field("has_unwind_info", &self.has_unwind_info())
369            .field("is_malformed", &self.is_malformed())
370            .finish()
371    }
372}
373
374impl<'slf, 'd: 'slf> AsSelf<'slf> for MachObject<'d> {
375    type Ref = MachObject<'slf>;
376
377    fn as_self(&'slf self) -> &'slf Self::Ref {
378        self
379    }
380}
381
382impl<'d> Parse<'d> for MachObject<'d> {
383    type Error = MachError;
384
385    fn test(data: &[u8]) -> bool {
386        Self::test(data)
387    }
388
389    fn parse_with_opts(data: &'d [u8], _opts: ParseObjectOptions) -> Result<Self, Self::Error> {
390        Self::parse(data)
391    }
392}
393
394impl<'data: 'object, 'object> ObjectLike<'data, 'object> for MachObject<'data> {
395    type Error = DwarfError;
396    type Session = DwarfDebugSession<'data>;
397    type SymbolIterator = MachOSymbolIterator<'data>;
398
399    fn file_format(&self) -> FileFormat {
400        self.file_format()
401    }
402
403    fn code_id(&self) -> Option<CodeId> {
404        self.code_id()
405    }
406
407    fn debug_id(&self) -> DebugId {
408        self.debug_id()
409    }
410
411    fn arch(&self) -> Arch {
412        self.arch()
413    }
414
415    fn kind(&self) -> ObjectKind {
416        self.kind()
417    }
418
419    fn load_address(&self) -> u64 {
420        self.load_address()
421    }
422
423    fn has_symbols(&self) -> bool {
424        self.has_symbols()
425    }
426
427    fn symbols(&self) -> Self::SymbolIterator {
428        self.symbols()
429    }
430
431    fn symbol_map(&self) -> SymbolMap<'data> {
432        self.symbol_map()
433    }
434
435    fn has_debug_info(&self) -> bool {
436        self.has_debug_info()
437    }
438
439    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
440        self.debug_session()
441    }
442
443    fn has_unwind_info(&self) -> bool {
444        self.has_unwind_info()
445    }
446
447    fn has_sources(&self) -> bool {
448        self.has_sources()
449    }
450
451    fn is_malformed(&self) -> bool {
452        self.is_malformed()
453    }
454}
455
456impl<'data> Dwarf<'data> for MachObject<'data> {
457    fn endianity(&self) -> Endian {
458        if self.macho.little_endian {
459            Endian::Little
460        } else {
461            Endian::Big
462        }
463    }
464
465    fn raw_section(&self, section_name: &str) -> Option<DwarfSection<'data>> {
466        for segment in &self.macho.segments {
467            for section in segment.into_iter() {
468                let (header, data) = section.ok()?;
469                if let Ok(sec) = header.name() {
470                    if sec.starts_with("__") && map_section_name(&sec[2..]) == section_name {
471                        // In some cases, dsymutil leaves sections headers but removes their
472                        // data from the file. While the addr and size parameters are still
473                        // set, `header.offset` is 0 in that case. We skip them just like the
474                        // section was missing to avoid loading invalid data.
475                        if header.offset == 0 {
476                            return None;
477                        }
478
479                        let (data, offset) = if self.macho.is_64 {
480                            // The section header's `offset` field is only 32 bits wide, so for
481                            // files larger than 4 GiB (e.g. produced with thin LTO) it is
482                            // truncated and the section data goblin slices out is wrong. Recompute
483                            // the real 64-bit file offset from the enclosing segment's `fileoff`
484                            // and `vmaddr`, which are both 64-bit, and re-slice the file data.
485                            header
486                                .addr
487                                .checked_sub(segment.vmaddr)
488                                .and_then(|rel| rel.checked_add(segment.fileoff))
489                                .and_then(|file_offset| {
490                                    let start = usize::try_from(file_offset).ok()?;
491                                    let len = usize::try_from(header.size).ok()?;
492                                    let end = start.checked_add(len)?;
493                                    Some((self.data.get(start..end)?, file_offset))
494                                })
495                                // Fall back to the data goblin provided if the segment-relative
496                                // computation is out of bounds or overflows.
497                                .unwrap_or((data, u64::from(header.offset)))
498                        } else {
499                            (data, u64::from(header.offset))
500                        };
501
502                        return Some(DwarfSection {
503                            data: Cow::Borrowed(data),
504                            address: header.addr,
505                            offset,
506                            align: u64::from(header.align),
507                        });
508                    }
509                }
510            }
511        }
512
513        None
514    }
515}
516
517/// See <https://llvm.org/doxygen/MachOObjectFile_8cpp_source.html#l05341>.
518fn map_section_name(name: &str) -> &str {
519    match name {
520        "debug_str_offs" => "debug_str_offsets",
521        _ => name,
522    }
523}
524/// An iterator over symbols in the MachO file.
525///
526/// Returned by [`MachObject::symbols`](struct.MachObject.html#method.symbols).
527pub struct MachOSymbolIterator<'data> {
528    symbols: mach::symbols::SymbolIterator<'data>,
529    sections: SmallVec<[usize; 2]>,
530    vmaddr: u64,
531    symbolmap: Option<Arc<BcSymbolMap<'data>>>,
532}
533
534impl<'data> Iterator for MachOSymbolIterator<'data> {
535    type Item = Symbol<'data>;
536
537    fn next(&mut self) -> Option<Self::Item> {
538        for next in &mut self.symbols {
539            let (mut name, nlist) = next.ok()?;
540
541            // Sanity check of the symbol address. Since we only intend to iterate over function
542            // symbols, they need to be mapped after the image's vmaddr.
543            if nlist.n_value < self.vmaddr {
544                continue;
545            }
546
547            // We are only interested in symbols pointing to a code section (type `N_SECT`). The
548            // section index is incremented by one to leave room for `NO_SECT` (0). Section indexes
549            // of the code sections have been passed in via `self.sections`.
550            let in_valid_section = !nlist.is_stab()
551                && nlist.get_type() == mach::symbols::N_SECT
552                && nlist.n_sect != (mach::symbols::NO_SECT as usize)
553                && self.sections.contains(&(nlist.n_sect - 1));
554
555            if !in_valid_section {
556                continue;
557            }
558
559            if let Some(symbolmap) = self.symbolmap.as_ref() {
560                name = symbolmap.resolve(name);
561            }
562
563            // Trim leading underscores from mangled C++ names.
564            if let Some(tail) = name.strip_prefix('_') {
565                if !name.starts_with(SWIFT_HIDDEN_PREFIX) {
566                    name = tail;
567                }
568            }
569
570            return Some(Symbol {
571                name: Some(Cow::Borrowed(name)),
572                address: nlist.n_value - self.vmaddr,
573                size: 0, // Computed in `SymbolMap`
574            });
575        }
576
577        None
578    }
579}
580
581/// An iterator over objects in a [`FatMachO`](struct.FatMachO.html).
582///
583/// Objects are parsed just-in-time while iterating, which may result in errors. The iterator is
584/// still valid afterwards, however, and can be used to resolve the next object.
585pub struct FatMachObjectIterator<'d, 'a> {
586    iter: mach::FatArchIterator<'a>,
587    remaining: usize,
588    data: &'d [u8],
589    opts: ParseObjectOptions,
590}
591
592impl<'d> Iterator for FatMachObjectIterator<'d, '_> {
593    type Item = Result<MachObject<'d>, MachError>;
594
595    fn next(&mut self) -> Option<Self::Item> {
596        if self.remaining == 0 {
597            return None;
598        }
599
600        self.remaining -= 1;
601        match self.iter.next() {
602            Some(Ok(arch)) => {
603                let start = (arch.offset as usize).min(self.data.len());
604                let end = (arch.offset as usize + arch.size as usize).min(self.data.len());
605                Some(MachObject::parse_with_opts(
606                    &self.data[start..end],
607                    self.opts,
608                ))
609            }
610            Some(Err(error)) => Some(Err(MachError::new(error))),
611            None => None,
612        }
613    }
614
615    fn size_hint(&self) -> (usize, Option<usize>) {
616        (self.remaining, Some(self.remaining))
617    }
618}
619
620impl std::iter::FusedIterator for FatMachObjectIterator<'_, '_> {}
621impl ExactSizeIterator for FatMachObjectIterator<'_, '_> {}
622
623/// A fat MachO container that hosts one or more [`MachObject`]s.
624pub struct FatMachO<'d> {
625    fat: mach::MultiArch<'d>,
626    data: &'d [u8],
627    opts: ParseObjectOptions,
628}
629
630impl<'d> FatMachO<'d> {
631    /// Tests whether the buffer could contain an ELF object.
632    pub fn test(data: &[u8]) -> bool {
633        matches!(MachArchive::is_fat(data), Some(true))
634    }
635
636    /// Tries to parse a fat MachO container from the given slice.
637    pub fn parse(data: &'d [u8]) -> Result<Self, MachError> {
638        Self::parse_with_opts(data, Default::default())
639    }
640
641    /// Tries to parse a fat MachO container from the given slice.
642    pub fn parse_with_opts(data: &'d [u8], opts: ParseObjectOptions) -> Result<Self, MachError> {
643        mach::MultiArch::new(data)
644            .map(|fat| FatMachO { fat, data, opts })
645            .map_err(MachError::new)
646    }
647
648    /// Returns an iterator over objects in this container.
649    pub fn objects(&self) -> FatMachObjectIterator<'d, '_> {
650        FatMachObjectIterator {
651            iter: self.fat.iter_arches(),
652            remaining: self.fat.narches,
653            data: self.data,
654            opts: self.opts,
655        }
656    }
657
658    /// Returns the number of objects in this archive.
659    pub fn object_count(&self) -> usize {
660        self.fat.narches
661    }
662
663    /// Resolves the object at the given index.
664    ///
665    /// Returns `Ok(None)` if the index is out of bounds, or `Err` if the object exists but cannot
666    /// be parsed.
667    pub fn object_by_index(&self, index: usize) -> Result<Option<MachObject<'d>>, MachError> {
668        let arch = match self.fat.iter_arches().nth(index) {
669            Some(arch) => arch.map_err(MachError::new)?,
670            None => return Ok(None),
671        };
672
673        let start = (arch.offset as usize).min(self.data.len());
674        let end = (arch.offset as usize + arch.size as usize).min(self.data.len());
675        MachObject::parse_with_opts(&self.data[start..end], self.opts).map(Some)
676    }
677}
678
679impl fmt::Debug for FatMachO<'_> {
680    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
681        f.debug_struct("FatMachO").field("fat", &self.fat).finish()
682    }
683}
684
685impl<'slf, 'd: 'slf> AsSelf<'slf> for FatMachO<'d> {
686    type Ref = FatMachO<'slf>;
687
688    fn as_self(&'slf self) -> &'slf Self::Ref {
689        self
690    }
691}
692
693#[allow(clippy::large_enum_variant)]
694enum MachObjectIteratorInner<'d, 'a> {
695    Single(MonoArchiveObjects<'d, MachObject<'d>>),
696    Archive(FatMachObjectIterator<'d, 'a>),
697}
698
699/// An iterator over objects in a [`MachArchive`](struct.MachArchive.html).
700pub struct MachObjectIterator<'d, 'a>(MachObjectIteratorInner<'d, 'a>);
701
702impl<'d> Iterator for MachObjectIterator<'d, '_> {
703    type Item = Result<MachObject<'d>, MachError>;
704
705    fn next(&mut self) -> Option<Self::Item> {
706        match self.0 {
707            MachObjectIteratorInner::Single(ref mut iter) => iter.next(),
708            MachObjectIteratorInner::Archive(ref mut iter) => iter.next(),
709        }
710    }
711
712    fn size_hint(&self) -> (usize, Option<usize>) {
713        match self.0 {
714            MachObjectIteratorInner::Single(ref iter) => iter.size_hint(),
715            MachObjectIteratorInner::Archive(ref iter) => iter.size_hint(),
716        }
717    }
718}
719
720impl std::iter::FusedIterator for MachObjectIterator<'_, '_> {}
721impl ExactSizeIterator for MachObjectIterator<'_, '_> {}
722
723#[derive(Debug)]
724enum MachArchiveInner<'d> {
725    Single(MonoArchive<'d, MachObject<'d>>),
726    Archive(FatMachO<'d>),
727}
728
729/// An archive that can consist of a single [`MachObject`] or a [`FatMachO`] container.
730///
731/// Executables and dSYM files on macOS can be a so-called _Fat Mach Object_: It contains multiple
732/// objects for several architectures. When loading this object, the operating system determines the
733/// object corresponding to the host's architecture. This allows to distribute a single binary with
734/// optimizations for specific CPUs, which is frequently done on iOS.
735///
736/// To abstract over the differences, `MachArchive` simulates the archive interface also for single
737/// Mach objects. This allows uniform access to both file types.
738///
739/// [`MachObject`]: struct.MachObject.html
740/// [`FatMachO`]: struct.FatMachO.html
741#[derive(Debug)]
742pub struct MachArchive<'d>(MachArchiveInner<'d>);
743
744impl<'d> MachArchive<'d> {
745    /// Tests whether the buffer contains either a Mach Object or a Fat Mach Object.
746    pub fn test(data: &[u8]) -> bool {
747        Self::is_fat(data).is_some()
748    }
749
750    /// Determines if the binary content is a macho object, and whether or not it is fat
751    fn is_fat(data: &[u8]) -> Option<bool> {
752        let (magic, _maybe_ctx) = goblin::mach::parse_magic_and_ctx(data, 0).ok()?;
753        match magic {
754            goblin::mach::fat::FAT_MAGIC => {
755                use scroll::Pread;
756                // so this is kind of stupid but java class files share the same cutesy magic
757                // as a macho fat file (CAFEBABE).  This means that we often claim that a java
758                // class file is actually a macho binary but it's not.  The next 32 bits encode
759                // the number of embedded architectures in a fat mach.  In case of a JAR file
760                // we have 2 bytes for minor version and 2 bytes for major version of the class
761                // file format.
762                //
763                // The internet suggests the first public version of Java had the class version
764                // 45.  Thus the logic applied here is that if the number is >= 45 we're more
765                // likely to have a java class file than a macho file with 45 architectures
766                // which should be very rare.
767                //
768                // https://docs.oracle.com/javase/specs/jvms/se6/html/ClassFile.doc.html
769                let narches = data.pread_with::<u32>(4, scroll::BE).ok()?;
770
771                if narches < 45 {
772                    Some(true)
773                } else {
774                    None
775                }
776            }
777            goblin::mach::header::MH_CIGAM_64
778            | goblin::mach::header::MH_CIGAM
779            | goblin::mach::header::MH_MAGIC_64
780            | goblin::mach::header::MH_MAGIC => Some(false),
781            _ => None,
782        }
783    }
784
785    /// Tries to parse a Mach archive from the given slice, with default options.
786    pub fn parse(data: &'d [u8]) -> Result<Self, MachError> {
787        Self::parse_with_opts(data, Default::default())
788    }
789
790    /// Tries to parse a Mach archive from the given slice.
791    pub fn parse_with_opts(data: &'d [u8], opts: ParseObjectOptions) -> Result<Self, MachError> {
792        Ok(Self(match Self::is_fat(data) {
793            Some(true) => MachArchiveInner::Archive(FatMachO::parse_with_opts(data, opts)?),
794            // Fall back to mach parsing to receive a meaningful error message from goblin
795            _ => MachArchiveInner::Single(MonoArchive::new(data, opts)),
796        }))
797    }
798
799    /// Returns an iterator over all objects contained in this archive.
800    pub fn objects(&self) -> MachObjectIterator<'d, '_> {
801        MachObjectIterator(match self.0 {
802            MachArchiveInner::Single(ref inner) => MachObjectIteratorInner::Single(inner.objects()),
803            MachArchiveInner::Archive(ref inner) => {
804                MachObjectIteratorInner::Archive(inner.objects())
805            }
806        })
807    }
808
809    /// Returns the number of objects in this archive.
810    pub fn object_count(&self) -> usize {
811        match self.0 {
812            MachArchiveInner::Single(ref inner) => inner.object_count(),
813            MachArchiveInner::Archive(ref inner) => inner.object_count(),
814        }
815    }
816
817    /// Resolves the object at the given index.
818    ///
819    /// Returns `Ok(None)` if the index is out of bounds, or `Err` if the object exists but cannot
820    /// be parsed.
821    pub fn object_by_index(&self, index: usize) -> Result<Option<MachObject<'d>>, MachError> {
822        match self.0 {
823            MachArchiveInner::Single(ref inner) => inner.object_by_index(index),
824            MachArchiveInner::Archive(ref inner) => inner.object_by_index(index),
825        }
826    }
827
828    /// Returns whether this is a multi-object archive.
829    ///
830    /// This may also return true if there is only a single object inside the archive.
831    pub fn is_multi(&self) -> bool {
832        match self.0 {
833            MachArchiveInner::Archive(_) => true,
834            MachArchiveInner::Single(_) => false,
835        }
836    }
837}
838
839impl<'slf, 'd: 'slf> AsSelf<'slf> for MachArchive<'d> {
840    type Ref = MachArchive<'slf>;
841
842    fn as_self(&'slf self) -> &'slf Self::Ref {
843        self
844    }
845}
846
847#[cfg(test)]
848mod tests {
849
850    use super::*;
851
852    #[test]
853    fn test_bcsymbolmap() {
854        let object_data =
855            std::fs::read("tests/fixtures/2d10c42f-591d-3265-b147-78ba0868073f.dwarf-hidden")
856                .unwrap();
857        let mut object = MachObject::parse(&object_data).unwrap();
858
859        // make sure that we get hidden symbols/filenames before loading the symbolmap
860        let mut symbols = object.symbols();
861        let symbol = symbols.next().unwrap();
862        assert_eq!(symbol.name.unwrap(), "__hidden#0_");
863
864        let session = object.debug_session().unwrap();
865        let mut files = session.files();
866        let file = files.next().unwrap().unwrap();
867        assert_eq!(&file.path_str(), "__hidden#41_/__hidden#42_");
868        assert_eq!(
869            &file.abs_path_str(),
870            // XXX: the path joining logic usually detects absolute paths (see below), but that does
871            // not work for these hidden paths.
872            "__hidden#41_/__hidden#41_/__hidden#42_"
873        );
874
875        let mut functions = session.functions();
876        let function = functions.next().unwrap().unwrap();
877        assert_eq!(&function.name, "__hidden#0_");
878        assert_eq!(&function.compilation_dir, b"__hidden#41_");
879        assert_eq!(
880            &function.lines[0].file.path_str(),
881            "__hidden#41_/__hidden#42_"
882        );
883
884        let fn_with_inlinees = functions
885            .filter_map(|f| f.ok())
886            .find(|f| !f.inlinees.is_empty())
887            .unwrap();
888        let inlinee = fn_with_inlinees.inlinees.first().unwrap();
889        assert_eq!(&inlinee.name, "__hidden#146_");
890
891        // loads the symbolmap
892        let bc_symbol_map_data =
893            std::fs::read("tests/fixtures/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap")
894                .unwrap();
895        let bc_symbol_map = BcSymbolMap::parse(&bc_symbol_map_data).unwrap();
896        object.load_symbolmap(bc_symbol_map);
897
898        // make sure we get resolved symbols/filenames now
899        let mut symbols = object.symbols();
900        let symbol = symbols.next().unwrap();
901        assert_eq!(symbol.name.unwrap(), "-[SentryMessage initWithFormatted:]");
902
903        let symbol = symbols.next().unwrap();
904        assert_eq!(symbol.name.unwrap(), "-[SentryMessage setMessage:]");
905
906        let session = object.debug_session().unwrap();
907        let mut files = session.files();
908        let file = files.next().unwrap().unwrap();
909        assert_eq!(
910            &file.path_str(),
911            "/Users/philipphofmann/git-repos/sentry-cocoa/Sources/Sentry/SentryMessage.m"
912        );
913        assert_eq!(
914            &file.abs_path_str(),
915            "/Users/philipphofmann/git-repos/sentry-cocoa/Sources/Sentry/SentryMessage.m"
916        );
917
918        let mut functions = session.functions();
919        let function = functions.next().unwrap().unwrap();
920        assert_eq!(&function.name, "-[SentryMessage initWithFormatted:]");
921        assert_eq!(
922            &function.compilation_dir,
923            b"/Users/philipphofmann/git-repos/sentry-cocoa"
924        );
925        assert_eq!(
926            &function.lines[0].file.path_str(),
927            "/Users/philipphofmann/git-repos/sentry-cocoa/Sources/Sentry/SentryMessage.m"
928        );
929
930        let fn_with_inlinees = functions
931            .filter_map(|f| f.ok())
932            .find(|f| !f.inlinees.is_empty())
933            .unwrap();
934        let inlinee = fn_with_inlinees.inlinees.first().unwrap();
935        assert_eq!(&inlinee.name, "prepareReportWriter");
936    }
937
938    #[test]
939    fn test_overflow_multiarch() {
940        let data = [
941            0xbe, 0xba, 0xfe, 0xca, // magic
942            0x00, 0x00, 0x00, 0x01, // num arches = 1
943            0x00, 0x00, 0x00, 0x00, // cpu type
944            0x00, 0x00, 0x00, 0x00, // cpu subtype
945            0x00, 0xff, 0xff, 0xff, // offset
946            0x00, 0x00, 0xff, 0xff, // size
947            0x00, 0x00, 0x00, 0x00, // align
948        ];
949
950        let fat = FatMachO::parse(&data).unwrap();
951
952        let obj = fat.object_by_index(0);
953        assert!(obj.is_err());
954
955        let mut iter = fat.objects();
956        assert!(iter.next().unwrap().is_err());
957    }
958
959    #[test]
960    fn test_section_access() {
961        let data = [
962            0xfe, 0xed, 0xfa, 0xcf, 0x1, 0x0, 0x0, 0x0, 0x0, 0x2, 0xed, 0xfa, 0xce, 0x6f, 0x73,
963            0x6f, 0x0, 0x0, 0x0, 0x7, 0x0, 0x0, 0x0, 0x4d, 0x4f, 0x44, 0x55, 0x4c, 0x40, 0x20, 0x0,
964            0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x4d, 0xc2, 0xc2, 0xc2, 0xc2,
965            0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xca, 0x7a, 0xfe, 0xba, 0xbe, 0x0, 0x0, 0x0, 0x20,
966            0x43, 0x2f, 0x0, 0x32, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0x0, 0x0, 0x0, 0x4d, 0x4f,
967            0x44, 0x55, 0x4c, 0x40, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
968            0x0, 0x0, 0x2a, 0x78, 0x6e, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2,
969            0xc2, 0xc2, 0xc2, 0xc2, 0xc6, 0xd5, 0xc2, 0xc2, 0x1f, 0x1f,
970        ];
971
972        let obj = MachObject::parse(&data).unwrap();
973
974        assert!(!obj.has_debug_info());
975    }
976
977    #[test]
978    fn test_invalid_symbols() {
979        let data = std::fs::read("tests/fixtures/invalid-symbols.fuzzed").unwrap();
980
981        let obj = MachObject::parse(&data).unwrap();
982
983        let _ = obj.symbol_map();
984    }
985}