malwaredb_types/exec/macho/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3/// Fat Mach-O parsing (multiple Mach-O programs in one binary)
4pub mod fat;
5
6use crate::exec::{
7    Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Sections,
8};
9use crate::utils::{u16_from_offset, u32_from_offset, EntropyCalc};
10use crate::{Ordering, SpecimenFile};
11
12use std::fmt::{Display, Formatter};
13
14use anyhow::{anyhow, bail, Context, Result};
15use chrono::{DateTime, Utc};
16use tracing::{instrument, warn};
17
18// Apple's Mach-O loader:
19// https://opensource.apple.com/source/xnu/xnu-2050.18.24/EXTERNAL_HEADERS/mach-o/loader.h
20
21const MAGIC32: [u8; 4] = [0xfe, 0xed, 0xfa, 0xce];
22const CIGAM32: [u8; 4] = [0xce, 0xfa, 0xed, 0xfe];
23const MAGIC64: [u8; 4] = [0xfe, 0xed, 0xfa, 0xcf];
24const CIGAM64: [u8; 4] = [0xcf, 0xfa, 0xed, 0xfe];
25
26/// A struct presenting Mach Objects (Mach-O) files
27///
28/// These are executables or libraries used on macOS, iOS, iPadOS, tvOS, watchOS, etc. They began
29/// as the file format for `NeXTSTEP`.
30///
31/// Because of the different architectures, these files could be bi-endian.
32#[derive(Clone, Debug)]
33pub struct Macho<'a> {
34    /// If the program is 64-bit
35    pub is64bit: bool,
36
37    /// Instruction set architecture for this binary
38    pub arch: Architecture,
39
40    /// If the binary has extra data after the last section, could be used to hide something
41    pub has_overlay: Option<bool>,
42
43    /// Byte ordering for this binary
44    pub ordering: Ordering,
45
46    /// Executable subtype: Program, Library, or Core file?
47    pub executable_type: ExecutableType,
48
49    /// Operating System for this binary, going to be Mac OS or some derivative, could be `NeXTSTEP`
50    pub os: OperatingSystem,
51
52    /// Sections of this binary
53    pub sections: Option<Sections<'a>>,
54
55    /// External libraries used by this application or library
56    pub imports: Option<Imports>,
57
58    /// The array containing the raw bytes used to parse this program
59    pub contents: &'a [u8],
60}
61
62impl<'a> Macho<'a> {
63    /// Mach-O parsed from a sequence of bytes
64    ///
65    /// # Errors
66    ///
67    /// Returns an error if parsing fails.
68    #[instrument(name = "Mach-O parser", skip(contents))]
69    pub fn from(contents: &'a [u8]) -> Result<Self> {
70        let (is_64bit, ordering) = {
71            match contents[0..MAGIC32.len()]
72                .try_into()
73                .context("Mach-O buffer too small for a magic number")?
74            {
75                MAGIC32 => (false, Ordering::BigEndian),
76                CIGAM32 => (false, Ordering::LittleEndian),
77                MAGIC64 => (true, Ordering::BigEndian),
78                CIGAM64 => (true, Ordering::LittleEndian),
79                _ => bail!("Not a Mach-O file"),
80            }
81        };
82
83        // The CPU type has an upper end flag to indicate if 64-bit, in addition
84        // to the different machine number. Check it, and clear it.
85        let mut arch = u32_from_offset(contents, 4, ordering)
86            .ok_or(anyhow!("Mach-O buffer too small for getting architecture"))?;
87        let should_be_64bit = (arch & 0x0100_0000) == 0x0100_0000;
88        if !is_64bit && should_be_64bit {
89            warn!("Mach-O had 64-bit magic header but not 64-bit magic.");
90        }
91        if is_64bit && !should_be_64bit {
92            warn!("Mach-O had 64-bit lacked magic header but had 64-bit magic.");
93        }
94        arch &= 0x00FF_FFFF;
95        let arch = match arch {
96            0x06 => Architecture::M68k,
97            0x07 => {
98                if is_64bit {
99                    Architecture::X86_64
100                } else {
101                    Architecture::X86
102                }
103            }
104            0x8 => {
105                if is_64bit {
106                    Architecture::MIPS64
107                } else {
108                    Architecture::MIPS
109                }
110            }
111            0x0C => {
112                if is_64bit {
113                    Architecture::ARM64
114                } else {
115                    Architecture::ARM
116                }
117            }
118            0x0D => Architecture::M88k,
119            0x0E => {
120                if is_64bit {
121                    Architecture::Sparc64
122                } else {
123                    Architecture::Sparc
124                }
125            }
126            0x10 => Architecture::Alpha,
127            0x12 => {
128                if is_64bit {
129                    Architecture::PowerPC64
130                } else {
131                    Architecture::PowerPC
132                }
133            }
134            other => Architecture::Other(other),
135        };
136
137        let exec_type = u16_from_offset(contents, 4, ordering)
138            .ok_or(anyhow!("Mach-O buffer too small for executable type"))?;
139        let exec_type = {
140            if (exec_type & 0x02) != 0 || (exec_type & 0x05) != 0 {
141                ExecutableType::Program
142            } else if (exec_type & 0x06) != 0 || (exec_type & 0x09) != 0 {
143                ExecutableType::Library
144            } else if (exec_type & 0x04) != 0 {
145                ExecutableType::Core
146            } else {
147                ExecutableType::Unknown(exec_type)
148            }
149        };
150
151        Ok(Self {
152            is64bit: is_64bit,
153            arch,
154            has_overlay: None,
155            ordering,
156            executable_type: exec_type,
157            os: OperatingSystem::MacOS,
158            sections: None,
159            imports: None,
160            contents,
161        })
162    }
163}
164
165impl ExecutableFile for Macho<'_> {
166    fn architecture(&self) -> Option<Architecture> {
167        Some(self.arch)
168    }
169
170    fn pointer_size(&self) -> usize {
171        if self.is64bit {
172            64
173        } else {
174            32
175        }
176    }
177
178    fn operating_system(&self) -> OperatingSystem {
179        self.os
180    }
181
182    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
183        None
184    }
185
186    #[allow(clippy::cast_possible_truncation)]
187    fn num_sections(&self) -> u32 {
188        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
189    }
190
191    fn sections(&self) -> Option<&Sections<'_>> {
192        self.sections.as_ref()
193    }
194
195    fn import_hash(&self) -> Option<String> {
196        self.imports.as_ref().map(|i| hex::encode(i.hash()))
197    }
198
199    fn fuzzy_imports(&self) -> Option<String> {
200        self.imports.as_ref().map(Imports::fuzzy_hash)
201    }
202}
203
204impl SpecimenFile for Macho<'_> {
205    const MAGIC: &'static [&'static [u8]] = &[&MAGIC32, &MAGIC64, &CIGAM32, &CIGAM64];
206
207    fn type_name(&self) -> &'static str {
208        "Mach-O"
209    }
210}
211
212impl Display for Macho<'_> {
213    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
214        writeln!(f, "Mach-O file:")?;
215        writeln!(f, "\tOS: {}", self.os)?;
216        writeln!(f, "\tArchitecture: {}", self.arch)?;
217        writeln!(f, "\tOrdering: {}", self.ordering)?;
218        writeln!(f, "\tType: {}", self.executable_type)?;
219        if let Some(sections) = &self.sections {
220            writeln!(f, "\t{} sections:", sections.len())?;
221            for section in sections {
222                writeln!(f, "\t\t{section}")?;
223            }
224        }
225        if self.has_overlay == Some(true) {
226            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
227        }
228        writeln!(f, "\tSize: {}", self.contents.len())?;
229        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
230    }
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    #[test]
238    fn arm64() {
239        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_arm64");
240
241        let macho = Macho::from(BYTES).unwrap();
242        assert!(macho.is64bit);
243        assert_eq!(macho.executable_type, ExecutableType::Program);
244        assert_eq!(macho.arch, Architecture::ARM64);
245    }
246
247    #[test]
248    fn ppc() {
249        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_ppc");
250
251        let macho = Macho::from(BYTES).unwrap();
252        assert!(!macho.is64bit);
253        //assert_eq!(macho.executable_type, ExecutableType::Program);
254        assert_eq!(macho.arch, Architecture::PowerPC);
255    }
256
257    #[test]
258    fn ppc64() {
259        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_ppc64");
260
261        let macho = Macho::from(BYTES).unwrap();
262        assert!(macho.is64bit);
263        //assert_eq!(macho.executable_type, ExecutableType::Program);
264        assert_eq!(macho.arch, Architecture::PowerPC64);
265    }
266
267    #[test]
268    fn x86_64() {
269        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_x86_64");
270
271        let macho = Macho::from(BYTES).unwrap();
272        assert!(macho.is64bit);
273        assert_eq!(macho.executable_type, ExecutableType::Program);
274        assert_eq!(macho.arch, Architecture::X86_64);
275    }
276}