goblin_experimental/mach/
mod.rs

1//! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions
2use alloc::vec::Vec;
3use core::fmt;
4
5use log::debug;
6
7use scroll::ctx::SizeWith;
8use scroll::{Pread, BE};
9
10use crate::{archive, container};
11use crate::{error, take_hint_bytes};
12
13pub mod bind_opcodes;
14pub mod constants;
15pub mod exports;
16pub mod fat;
17pub mod header;
18pub mod imports;
19pub mod load_command;
20pub mod relocation;
21pub mod segment;
22pub mod symbols;
23
24pub use self::constants::cputype;
25
26/// Returns a big endian magical number
27pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> {
28    Ok(bytes.pread_with::<u32>(offset, scroll::BE)?)
29}
30
31/// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number.
32pub fn parse_magic_and_ctx(
33    bytes: &[u8],
34    offset: usize,
35) -> error::Result<(u32, Option<container::Ctx>)> {
36    use crate::container::Container;
37    use crate::mach::header::*;
38    let magic = bytes.pread_with::<u32>(offset, BE)?;
39    let ctx = match magic {
40        MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => {
41            let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64;
42            let le = scroll::Endian::from(is_lsb);
43            let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 {
44                Container::Big
45            } else {
46                Container::Little
47            };
48            Some(container::Ctx::new(container, le))
49        }
50        _ => None,
51    };
52    Ok((magic, ctx))
53}
54
55/// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser
56pub struct MachO<'a> {
57    /// The mach-o header
58    pub header: header::Header,
59    /// The load commands tell the kernel and dynamic linker how to use/interpret this binary
60    pub load_commands: Vec<load_command::LoadCommand>,
61    /// The load command "segments" - typically the pieces of the binary that are loaded into memory
62    pub segments: segment::Segments<'a>,
63    /// The "Nlist" style symbols in this binary - strippable
64    pub symbols: Option<symbols::Symbols<'a>>,
65    /// The dylibs this library depends on
66    pub libs: Vec<&'a str>,
67    /// The runtime search paths for dylibs this library depends on
68    pub rpaths: Vec<&'a str>,
69    /// The entry point (as a virtual memory address), 0 if none
70    pub entry: u64,
71    /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint
72    pub old_style_entry: bool,
73    /// The name of the dylib, if any
74    pub name: Option<&'a str>,
75    /// Are we a little-endian binary?
76    pub little_endian: bool,
77    /// Are we a 64-bit binary
78    pub is_64: bool,
79    data: &'a [u8],
80    ctx: container::Ctx,
81    export_trie: Option<exports::ExportTrie<'a>>,
82    bind_interpreter: Option<imports::BindInterpreter<'a>>,
83}
84
85impl<'a> fmt::Debug for MachO<'a> {
86    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
87        fmt.debug_struct("MachO")
88            .field("header", &self.header)
89            .field("load_commands", &self.load_commands)
90            .field("segments", &self.segments)
91            .field("entry", &self.entry)
92            .field("old_style_entry", &self.old_style_entry)
93            .field("libs", &self.libs)
94            .field("name", &self.name)
95            .field("little_endian", &self.little_endian)
96            .field("is_64", &self.is_64)
97            .field("symbols()", &self.symbols().collect::<Vec<_>>())
98            .field("exports()", &self.exports())
99            .field("imports()", &self.imports())
100            .finish()
101    }
102}
103
104impl<'a> MachO<'a> {
105    /// Is this a relocatable object file?
106    pub fn is_object_file(&self) -> bool {
107        self.header.filetype == header::MH_OBJECT
108    }
109    /// Return an iterator over all the symbols in this binary
110    pub fn symbols(&self) -> symbols::SymbolIterator<'a> {
111        if let Some(ref symbols) = self.symbols {
112            symbols.into_iter()
113        } else {
114            symbols::SymbolIterator::default()
115        }
116    }
117    /// Return a vector of the relocations in this binary
118    pub fn relocations(
119        &self,
120    ) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> {
121        debug!("Iterating relocations");
122        let mut relocs = Vec::new();
123        for (_i, segment) in (&self.segments).into_iter().enumerate() {
124            for (j, section) in segment.into_iter().enumerate() {
125                let (section, _data) = section?;
126                if section.nreloc > 0 {
127                    relocs.push((j, section.iter_relocations(self.data, self.ctx), section));
128                }
129            }
130        }
131        Ok(relocs)
132    }
133    /// Return the exported symbols in this binary (if any)
134    pub fn exports(&self) -> error::Result<Vec<exports::Export>> {
135        if let Some(ref trie) = self.export_trie {
136            trie.exports(self.libs.as_slice())
137        } else {
138            Ok(vec![])
139        }
140    }
141    /// Return the imported symbols in this binary that dyld knows about (if any)
142    pub fn imports(&self) -> error::Result<Vec<imports::Import>> {
143        if let Some(ref interpreter) = self.bind_interpreter {
144            interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx)
145        } else {
146            Ok(vec![])
147        }
148    }
149    /// Parses the Mach-o binary from `bytes` at `offset`
150    pub fn parse(bytes: &'a [u8], offset: usize) -> error::Result<MachO<'a>> {
151        Self::parse_impl(bytes, offset, false)
152    }
153
154    /// Parses the Mach-o binary from `bytes` at `offset` in lossy mode
155    pub fn parse_lossy(bytes: &'a [u8], offset: usize) -> error::Result<MachO<'a>> {
156        Self::parse_impl(bytes, offset, true)
157    }
158
159    /// Parses the Mach-o binary from `bytes` at `offset` in `lossy` mode
160    fn parse_impl(bytes: &'a [u8], mut offset: usize, lossy: bool) -> error::Result<MachO<'a>> {
161        let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?;
162        let ctx = if let Some(ctx) = maybe_ctx {
163            ctx
164        } else {
165            return Err(error::Error::BadMagic(u64::from(magic)));
166        };
167        debug!("Ctx: {:?}", ctx);
168        let offset = &mut offset;
169        let header: header::Header = bytes.pread_with(*offset, ctx)?;
170        debug!("Mach-o header: {:?}", header);
171        let little_endian = ctx.le.is_little();
172        let is_64 = ctx.container.is_big();
173        *offset += header::Header::size_with(&ctx.container);
174        let ncmds = header.ncmds;
175
176        let sizeofcmds = header.sizeofcmds as usize;
177        // a load cmd is at least 2 * 4 bytes, (type, sizeof)
178        if ncmds > sizeofcmds / 8 || sizeofcmds > bytes.len() {
179            return Err(error::Error::BufferTooShort(ncmds, "load commands"));
180        }
181
182        let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds);
183        let mut symbols = None;
184        let mut libs = vec!["self"];
185        let mut rpaths = vec![];
186        let mut export_trie = None;
187        let mut bind_interpreter = None;
188        let mut unixthread_entry_address = None;
189        let mut main_entry_offset = None;
190        let mut name = None;
191        let mut segments = segment::Segments::new(ctx);
192        for i in 0..ncmds {
193            let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?;
194            debug!("{} - {:?}", i, cmd);
195            match cmd.command {
196                load_command::CommandVariant::Segment32(command) => segments.push(
197                    segment::Segment::from_32_impl(bytes, &command, cmd.offset, ctx, lossy)?,
198                ),
199                load_command::CommandVariant::Segment64(command) => segments.push(
200                    segment::Segment::from_64_impl(bytes, &command, cmd.offset, ctx, lossy)?,
201                ),
202                load_command::CommandVariant::Symtab(command) => {
203                    match symbols::Symbols::parse(bytes, &command, ctx) {
204                        Ok(s) => symbols = Some(s),
205                        Err(e) if lossy => {
206                            debug!("CommandVariant::Symtab failed: {e}");
207                        }
208                        Err(e) => return Err(e),
209                    }
210                }
211                load_command::CommandVariant::LoadDylib(command)
212                | load_command::CommandVariant::LoadUpwardDylib(command)
213                | load_command::CommandVariant::ReexportDylib(command)
214                | load_command::CommandVariant::LoadWeakDylib(command)
215                | load_command::CommandVariant::LazyLoadDylib(command) => {
216                    match bytes.pread::<&str>(cmd.offset + command.dylib.name as usize) {
217                        Ok(lib) => libs.push(lib),
218                        Err(e) if lossy => {
219                            debug!("CommandVariant::Load/Reexport Dylib failed: {e}");
220                        }
221                        Err(e) => return Err(e.into()),
222                    }
223                }
224                load_command::CommandVariant::Rpath(command) => {
225                    match bytes.pread::<&str>(cmd.offset + command.path as usize) {
226                        Ok(rpath) => rpaths.push(rpath),
227                        Err(e) if lossy => {
228                            debug!("CommandVariant::Rpath failed: {e}");
229                        }
230                        Err(e) => return Err(e.into()),
231                    }
232                }
233                load_command::CommandVariant::DyldInfo(command)
234                | load_command::CommandVariant::DyldInfoOnly(command) => {
235                    export_trie = Some(exports::ExportTrie::new(bytes, &command));
236                    bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command));
237                }
238                load_command::CommandVariant::DyldExportsTrie(command) => {
239                    export_trie = Some(exports::ExportTrie::new_from_linkedit_data_command(
240                        bytes, &command,
241                    ));
242                }
243                load_command::CommandVariant::Unixthread(command) => {
244                    // dyld cares only about the first LC_UNIXTHREAD
245                    if unixthread_entry_address.is_none() {
246                        unixthread_entry_address =
247                            Some(command.instruction_pointer(header.cputype)?);
248                    }
249                }
250                load_command::CommandVariant::Main(command) => {
251                    // dyld cares only about the first LC_MAIN
252                    if main_entry_offset.is_none() {
253                        main_entry_offset = Some(command.entryoff);
254                    }
255                }
256                load_command::CommandVariant::IdDylib(command) => {
257                    match bytes.pread::<&str>(cmd.offset + command.dylib.name as usize) {
258                        Ok(id) => {
259                            libs[0] = id;
260                            name = Some(id);
261                        }
262                        Err(e) if lossy => {
263                            debug!("CommandVariant::IdDylib failed: {e}");
264                        }
265                        Err(e) => return Err(e.into()),
266                    }
267                }
268                _ => (),
269            }
270            cmds.push(cmd)
271        }
272
273        // dyld prefers LC_MAIN over LC_UNIXTHREAD
274        // choose the same way here
275        let (entry, old_style_entry) = if let Some(offset) = main_entry_offset {
276            // map the entrypoint offset to a virtual memory address
277            let base_address = segments
278                .iter()
279                .filter(|s| &s.segname[0..7] == b"__TEXT\0")
280                .map(|s| s.vmaddr - s.fileoff)
281                .next()
282                .ok_or_else(|| {
283                    error::Error::Malformed(format!(
284                        "image specifies LC_MAIN offset {} but has no __TEXT segment",
285                        offset
286                    ))
287                })?;
288
289            (base_address + offset, false)
290        } else if let Some(address) = unixthread_entry_address {
291            (address, true)
292        } else {
293            (0, false)
294        };
295
296        Ok(MachO {
297            header,
298            load_commands: cmds,
299            segments,
300            symbols,
301            libs,
302            rpaths,
303            export_trie,
304            bind_interpreter,
305            entry,
306            old_style_entry,
307            name,
308            ctx,
309            is_64,
310            little_endian,
311            data: bytes,
312        })
313    }
314}
315
316/// A Mach-o multi architecture (Fat) binary container
317pub struct MultiArch<'a> {
318    data: &'a [u8],
319    start: usize,
320    pub narches: usize,
321}
322
323/// Iterator over the fat architecture headers in a `MultiArch` container
324pub struct FatArchIterator<'a> {
325    index: usize,
326    data: &'a [u8],
327    narches: usize,
328    start: usize,
329}
330
331/// A single architecture froma multi architecture binary container
332/// ([MultiArch]).
333#[derive(Debug)]
334#[allow(clippy::large_enum_variant)]
335pub enum SingleArch<'a> {
336    MachO(MachO<'a>),
337    Archive(archive::Archive<'a>),
338}
339
340impl<'a> Iterator for FatArchIterator<'a> {
341    type Item = error::Result<fat::FatArch>;
342    fn next(&mut self) -> Option<Self::Item> {
343        if self.index >= self.narches {
344            None
345        } else {
346            let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start;
347            let arch = self
348                .data
349                .pread_with::<fat::FatArch>(offset, scroll::BE)
350                .map_err(core::convert::Into::into);
351            self.index += 1;
352            Some(arch)
353        }
354    }
355}
356
357/// Iterator over every entry contained in this `MultiArch` container
358pub struct SingleArchIterator<'a> {
359    index: usize,
360    data: &'a [u8],
361    narches: usize,
362    start: usize,
363}
364
365pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<crate::Hint> {
366    if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC {
367        Ok(crate::Hint::Archive)
368    } else {
369        let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, 0)?;
370        match magic {
371            header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => {
372                if let Some(ctx) = maybe_ctx {
373                    Ok(crate::Hint::Mach(crate::HintData {
374                        is_lsb: ctx.le.is_little(),
375                        is_64: Some(ctx.container.is_big()),
376                    }))
377                } else {
378                    Err(error::Error::Malformed(format!(
379                        "Correct mach magic {:#x} does not have a matching parsing context!",
380                        magic
381                    )))
382                }
383            }
384            fat::FAT_MAGIC => {
385                // should probably verify this is always Big Endian...
386                let narchitectures = bytes.pread_with::<u32>(4, BE)? as usize;
387                Ok(crate::Hint::MachFat(narchitectures))
388            }
389            _ => Ok(crate::Hint::Unknown(bytes.pread::<u64>(0)?)),
390        }
391    }
392}
393
394fn extract_multi_entry(bytes: &[u8]) -> error::Result<SingleArch> {
395    if let Some(hint_bytes) = take_hint_bytes(bytes) {
396        match peek_bytes(hint_bytes)? {
397            crate::Hint::Mach(_) => {
398                let binary = MachO::parse(bytes, 0)?;
399                Ok(SingleArch::MachO(binary))
400            }
401            crate::Hint::Archive => {
402                let archive = archive::Archive::parse(bytes)?;
403                Ok(SingleArch::Archive(archive))
404            }
405            _ => Err(error::Error::Malformed(format!(
406                "multi-arch entry must be a Mach-O binary or an archive"
407            ))),
408        }
409    } else {
410        Err(error::Error::Malformed(format!("Object is too small")))
411    }
412}
413
414impl<'a> Iterator for SingleArchIterator<'a> {
415    type Item = error::Result<SingleArch<'a>>;
416    fn next(&mut self) -> Option<Self::Item> {
417        if self.index >= self.narches {
418            None
419        } else {
420            let index = self.index;
421            let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
422            self.index += 1;
423            match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) {
424                Ok(arch) => {
425                    let bytes = arch.slice(self.data);
426                    Some(extract_multi_entry(bytes))
427                }
428                Err(e) => Some(Err(e.into())),
429            }
430        }
431    }
432}
433
434impl<'a, 'b> IntoIterator for &'b MultiArch<'a> {
435    type Item = error::Result<SingleArch<'a>>;
436    type IntoIter = SingleArchIterator<'a>;
437    fn into_iter(self) -> Self::IntoIter {
438        SingleArchIterator {
439            index: 0,
440            data: self.data,
441            narches: self.narches,
442            start: self.start,
443        }
444    }
445}
446
447impl<'a> MultiArch<'a> {
448    /// Lazily construct `Self`
449    pub fn new(bytes: &'a [u8]) -> error::Result<Self> {
450        let header = fat::FatHeader::parse(bytes)?;
451        Ok(MultiArch {
452            data: bytes,
453            start: fat::SIZEOF_FAT_HEADER,
454            narches: header.nfat_arch as usize,
455        })
456    }
457    /// Iterate every fat arch header
458    pub fn iter_arches(&self) -> FatArchIterator {
459        FatArchIterator {
460            index: 0,
461            data: self.data,
462            narches: self.narches,
463            start: self.start,
464        }
465    }
466    /// Return all the architectures in this binary
467    pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> {
468        if self.narches > self.data.len() / fat::SIZEOF_FAT_ARCH {
469            return Err(error::Error::BufferTooShort(self.narches, "arches"));
470        }
471
472        let mut arches = Vec::with_capacity(self.narches);
473        for arch in self.iter_arches() {
474            arches.push(arch?);
475        }
476        Ok(arches)
477    }
478    /// Try to get the Mach-o binary at `index`
479    pub fn get(&self, index: usize) -> error::Result<SingleArch<'a>> {
480        if index >= self.narches {
481            return Err(error::Error::Malformed(format!(
482                "Requested the {}-th binary, but there are only {} architectures in this container",
483                index, self.narches
484            )));
485        }
486        let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
487        let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?;
488        let bytes = arch.slice(self.data);
489        extract_multi_entry(bytes)
490    }
491
492    pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(
493        &'a self,
494        f: F,
495    ) -> Option<error::Result<SingleArch<'a>>> {
496        for (i, arch) in self.iter_arches().enumerate() {
497            if f(arch) {
498                return Some(self.get(i));
499            }
500        }
501        None
502    }
503    /// Try and find the `cputype` in `Self`, if there is one
504    pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> {
505        for arch in self.iter_arches() {
506            let arch = arch?;
507            if arch.cputype == cputype {
508                return Ok(Some(arch));
509            }
510        }
511        Ok(None)
512    }
513}
514
515impl<'a> fmt::Debug for MultiArch<'a> {
516    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
517        fmt.debug_struct("MultiArch")
518            .field("arches", &self.arches().unwrap_or_default())
519            .field("data", &self.data.len())
520            .finish()
521    }
522}
523
524#[derive(Debug)]
525#[allow(clippy::large_enum_variant)]
526/// Either a collection of multiple architectures, or a single mach-o binary
527pub enum Mach<'a> {
528    /// A "fat" multi-architecture binary container
529    Fat(MultiArch<'a>),
530    /// A regular Mach-o binary
531    Binary(MachO<'a>),
532}
533
534impl<'a> Mach<'a> {
535    /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary
536    pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
537        Self::parse_impl(bytes, false)
538    }
539
540    /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary in lossy mode
541    pub fn parse_lossy(bytes: &'a [u8]) -> error::Result<Self> {
542        Self::parse_impl(bytes, true)
543    }
544
545    /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary
546    fn parse_impl(bytes: &'a [u8], lossy: bool) -> error::Result<Self> {
547        let size = bytes.len();
548        if size < 4 {
549            let error = error::Error::Malformed("size is smaller than a magical number".into());
550            return Err(error);
551        }
552        let magic = peek(&bytes, 0)?;
553        match magic {
554            fat::FAT_MAGIC => {
555                let multi = MultiArch::new(bytes)?;
556                Ok(Mach::Fat(multi))
557            }
558            // we might be a regular binary
559            _ => {
560                let binary = MachO::parse_impl(bytes, 0, lossy)?;
561                Ok(Mach::Binary(binary))
562            }
563        }
564    }
565}
566
567#[cfg(test)]
568mod test {
569    use super::{Mach, SingleArch};
570
571    #[test]
572    fn parse_multi_arch_of_macho_binaries() {
573        // Create via:
574        // clang -arch arm64 -shared -o /tmp/hello_world_arm hello_world.c
575        // clang -arch x86_64 -shared -o /tmp/hello_world_x86_64 hello_world.c
576        // lipo -create -output hello_world_fat_binaries /tmp/hello_world_arm /tmp/hello_world_x86_64
577        // strip hello_world_fat_binaries
578        let bytes = include_bytes!(concat!(
579            env!("CARGO_MANIFEST_DIR"),
580            "/assets/hello_world_fat_binaries"
581        ));
582        let mach = Mach::parse(bytes).expect("failed to parse input file");
583        match mach {
584            Mach::Fat(fat) => {
585                assert!(fat.into_iter().count() > 0);
586                for entry in fat.into_iter() {
587                    let entry = entry.expect("failed to read entry");
588                    match entry {
589                        SingleArch::MachO(macho) => {
590                            assert!(macho.symbols().count() > 0);
591                        }
592                        _ => panic!("expected MultiArchEntry::MachO, got {:?}", entry),
593                    }
594                }
595            }
596            Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"),
597        }
598    }
599
600    #[test]
601    fn parse_multi_arch_of_archives() {
602        // Created with:
603        // clang -c -o /tmp/hello_world.o hello_world.c
604        // ar -r /tmp/hello_world.a /tmp/hello_world.o
605        // lipo -create -output hello_world_fat_archives /tmp/hello_world.a
606        // strip hello_world_fat_archives
607        let bytes = include_bytes!(concat!(
608            env!("CARGO_MANIFEST_DIR"),
609            "/assets/hello_world_fat_archives"
610        ));
611        let mach = Mach::parse(bytes).expect("failed to parse input file");
612        match mach {
613            Mach::Fat(fat) => {
614                assert!(fat.into_iter().count() > 0);
615                for entry in fat.into_iter() {
616                    let entry = entry.expect("failed to read entry");
617                    match entry {
618                        SingleArch::Archive(archive) => {
619                            assert!(!archive.members().is_empty())
620                        }
621                        _ => panic!("expected MultiArchEntry::Archive, got {:?}", entry),
622                    }
623                }
624            }
625            Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"),
626        }
627    }
628}