malwaredb_types/exec/macho/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3/// Fat Mach-O parsing (multiple Mach-O programs in one binary)
4pub mod fat;
5
6use crate::exec::{
7    Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Sections,
8};
9use crate::utils::{u16_from_offset, u32_from_offset, EntropyCalc};
10use crate::{Ordering, SpecimenFile};
11
12use std::fmt::{Display, Formatter};
13
14use anyhow::{bail, Result};
15use chrono::{DateTime, Utc};
16use tracing::instrument;
17
18// Apple's Mach-O loader:
19// https://opensource.apple.com/source/xnu/xnu-2050.18.24/EXTERNAL_HEADERS/mach-o/loader.h
20
21const MAGIC32: [u8; 4] = [0xfe, 0xed, 0xfa, 0xce];
22const CIGAM32: [u8; 4] = [0xce, 0xfa, 0xed, 0xfe];
23const MAGIC64: [u8; 4] = [0xfe, 0xed, 0xfa, 0xcf];
24const CIGAM64: [u8; 4] = [0xcf, 0xfa, 0xed, 0xfe];
25
26/// A struct presenting Mach Objects (Mach-O) files
27///
28/// These are executables or libraries used on macOS, iOS, iPadOS, tvOS, watchOS, etc. They began
29/// as the file format for `NeXTSTEP`.
30///
31/// Because of the different architectures, these files could be bi-endian.
32#[derive(Clone, Debug)]
33pub struct Macho<'a> {
34    /// If the program is 64-bit
35    pub is64bit: bool,
36
37    /// Instruction set architecture for this binary
38    pub arch: Architecture,
39
40    /// If the binary has extra data after the last section, could be used to hide something
41    pub has_overlay: Option<bool>,
42
43    /// Byte ordering for this binary
44    pub ordering: Ordering,
45
46    /// Executable subtype: Program, Library, or Core file?
47    pub executable_type: ExecutableType,
48
49    /// Operating System for this binary, going to be Mac OS or some derivative, could be `NeXTSTEP`
50    pub os: OperatingSystem,
51
52    /// Sections of this binary
53    pub sections: Option<Sections<'a>>,
54
55    /// External libraries used by this application or library
56    pub imports: Option<Imports>,
57
58    /// The array containing the raw bytes used to parse this program
59    pub contents: &'a [u8],
60}
61
62impl<'a> Macho<'a> {
63    /// Mach-O parsed from a sequence of bytes
64    #[instrument(name = "Mach-O parser", skip(contents))]
65    pub fn from(contents: &'a [u8]) -> Result<Self> {
66        let (is_64bit, ordering) = {
67            match contents[0..MAGIC32.len()].try_into().unwrap() {
68                MAGIC32 => (false, Ordering::BigEndian),
69                CIGAM32 => (false, Ordering::LittleEndian),
70                MAGIC64 => (true, Ordering::BigEndian),
71                CIGAM64 => (true, Ordering::LittleEndian),
72                _ => bail!("Not a Mach-O file"),
73            }
74        };
75
76        // The CPU type has an upper end flag to indicate if 64-bit, in addition
77        // to the different machine number. Check it, and clear it.
78        let mut arch = u32_from_offset(contents, 4, ordering);
79        let _should_be_64bit = (arch & 0x0100_0000) == 0x0100_0000;
80        arch &= 0x00FF_FFFF;
81        let arch = match arch {
82            0x06 => Architecture::M68k,
83            0x07 => {
84                if is_64bit {
85                    Architecture::X86_64
86                } else {
87                    Architecture::X86
88                }
89            }
90            0x8 => {
91                if is_64bit {
92                    Architecture::MIPS64
93                } else {
94                    Architecture::MIPS
95                }
96            }
97            0x0C => {
98                if is_64bit {
99                    Architecture::ARM64
100                } else {
101                    Architecture::ARM
102                }
103            }
104            0x0D => Architecture::M88k,
105            0x0E => {
106                if is_64bit {
107                    Architecture::Sparc64
108                } else {
109                    Architecture::Sparc
110                }
111            }
112            0x10 => Architecture::Alpha,
113            0x12 => {
114                if is_64bit {
115                    Architecture::PowerPC64
116                } else {
117                    Architecture::PowerPC
118                }
119            }
120            other => Architecture::Other(other as u16),
121        };
122
123        let exec_type = u16_from_offset(contents, 4, ordering);
124        let exec_type = {
125            if (exec_type & 0x02) != 0 || (exec_type & 0x05) != 0 {
126                ExecutableType::Program
127            } else if (exec_type & 0x06) != 0 || (exec_type & 0x09) != 0 {
128                ExecutableType::Library
129            } else if (exec_type & 0x04) != 0 {
130                ExecutableType::Core
131            } else {
132                ExecutableType::Unknown(exec_type)
133            }
134        };
135
136        Ok(Self {
137            is64bit: is_64bit,
138            arch,
139            has_overlay: None,
140            ordering,
141            executable_type: exec_type,
142            os: OperatingSystem::MacOS,
143            sections: None,
144            imports: None,
145            contents,
146        })
147    }
148}
149
150impl ExecutableFile for Macho<'_> {
151    fn architecture(&self) -> Architecture {
152        self.arch
153    }
154
155    fn pointer_size(&self) -> usize {
156        if self.is64bit {
157            64
158        } else {
159            32
160        }
161    }
162
163    fn operating_system(&self) -> OperatingSystem {
164        self.os
165    }
166
167    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
168        None
169    }
170
171    fn num_sections(&self) -> u32 {
172        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
173    }
174
175    fn sections(&self) -> Option<&Sections> {
176        self.sections.as_ref()
177    }
178
179    fn import_hash(&self) -> Option<String> {
180        self.imports.as_ref().map(|i| hex::encode(i.hash()))
181    }
182
183    fn fuzzy_imports(&self) -> Option<String> {
184        self.imports.as_ref().map(Imports::fuzzy_hash)
185    }
186}
187
188impl SpecimenFile for Macho<'_> {
189    const MAGIC: &'static [&'static [u8]] = &[&MAGIC32, &MAGIC64, &CIGAM32, &CIGAM64];
190
191    fn type_name(&self) -> &'static str {
192        "Mach-O"
193    }
194}
195
196impl Display for Macho<'_> {
197    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
198        writeln!(f, "Mach-O file:")?;
199        writeln!(f, "\tOS: {}", self.os)?;
200        writeln!(f, "\tArchitecture: {}", self.arch)?;
201        writeln!(f, "\tOrdering: {}", self.ordering)?;
202        writeln!(f, "\tType: {}", self.executable_type)?;
203        if let Some(sections) = &self.sections {
204            writeln!(f, "\t{} sections:", sections.len())?;
205            for section in sections {
206                writeln!(f, "\t\t{section}")?;
207            }
208        }
209        if self.has_overlay == Some(true) {
210            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
211        }
212        writeln!(f, "\tSize: {}", self.contents.len())?;
213        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
214    }
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    #[test]
222    fn arm64() {
223        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_arm64");
224
225        let macho = Macho::from(BYTES).unwrap();
226        assert!(macho.is64bit);
227        assert_eq!(macho.executable_type, ExecutableType::Program);
228        assert_eq!(macho.arch, Architecture::ARM64);
229    }
230
231    #[test]
232    fn ppc() {
233        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_ppc");
234
235        let macho = Macho::from(BYTES).unwrap();
236        assert!(!macho.is64bit);
237        //assert_eq!(macho.executable_type, ExecutableType::Program);
238        assert_eq!(macho.arch, Architecture::PowerPC);
239    }
240
241    #[test]
242    fn ppc64() {
243        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_ppc64");
244
245        let macho = Macho::from(BYTES).unwrap();
246        assert!(macho.is64bit);
247        //assert_eq!(macho.executable_type, ExecutableType::Program);
248        assert_eq!(macho.arch, Architecture::PowerPC64);
249    }
250
251    #[test]
252    fn x86_64() {
253        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_x86_64");
254
255        let macho = Macho::from(BYTES).unwrap();
256        assert!(macho.is64bit);
257        assert_eq!(macho.executable_type, ExecutableType::Program);
258        assert_eq!(macho.arch, Architecture::X86_64);
259    }
260}