Skip to main content

malwaredb_types/exec/macho/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3/// Fat Mach-O parsing (multiple Mach-O programs in one binary)
4pub mod fat;
5
6use crate::exec::{
7    Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Sections,
8};
9use crate::utils::{u16_from_offset, u32_from_offset, EntropyCalc};
10use crate::{Ordering, SpecimenFile};
11
12use std::fmt::{Display, Formatter};
13
14use anyhow::{anyhow, bail, Context, Result};
15use chrono::{DateTime, Utc};
16use tracing::{instrument, warn};
17use uuid::Uuid;
18
19// Apple's Mach-O loader:
20// https://opensource.apple.com/source/xnu/xnu-2050.18.24/EXTERNAL_HEADERS/mach-o/loader.h
21
22const MAGIC32: [u8; 4] = [0xfe, 0xed, 0xfa, 0xce];
23const CIGAM32: [u8; 4] = [0xce, 0xfa, 0xed, 0xfe];
24const MAGIC64: [u8; 4] = [0xfe, 0xed, 0xfa, 0xcf];
25const CIGAM64: [u8; 4] = [0xcf, 0xfa, 0xed, 0xfe];
26
27/// A struct presenting Mach Objects (Mach-O) files
28///
29/// These are executables or libraries used on macOS, iOS, iPadOS, tvOS, watchOS, etc. They began
30/// as the file format for `NeXTSTEP`.
31///
32/// Because of the different architectures, these files could be bi-endian.
33#[derive(Clone, Debug)]
34pub struct Macho<'a> {
35    /// If the program is 64-bit
36    pub is64bit: bool,
37
38    /// Instruction set architecture for this binary
39    pub arch: Architecture,
40
41    /// If the binary has extra data after the last section, could be used to hide something
42    pub has_overlay: Option<bool>,
43
44    /// Byte ordering for this binary
45    pub ordering: Ordering,
46
47    /// Executable subtype: Program, Library, or Core file?
48    pub executable_type: ExecutableType,
49
50    /// Operating System for this binary, going to be Mac OS or some derivative, could be `NeXTSTEP`
51    pub os: OperatingSystem,
52
53    /// Sections of this binary
54    pub sections: Option<Sections<'a>>,
55
56    /// External libraries used by this application or library
57    pub imports: Option<Imports>,
58
59    /// The array containing the raw bytes used to parse this program
60    pub contents: &'a [u8],
61}
62
63impl<'a> Macho<'a> {
64    /// Mach-O parsed from a sequence of bytes
65    ///
66    /// # Errors
67    ///
68    /// Returns an error if parsing fails.
69    #[instrument(name = "Mach-O parser", skip(contents))]
70    pub fn from(contents: &'a [u8]) -> Result<Self> {
71        let (is_64bit, ordering) = {
72            match contents[0..MAGIC32.len()]
73                .try_into()
74                .context("Mach-O buffer too small for a magic number")?
75            {
76                MAGIC32 => (false, Ordering::BigEndian),
77                CIGAM32 => (false, Ordering::LittleEndian),
78                MAGIC64 => (true, Ordering::BigEndian),
79                CIGAM64 => (true, Ordering::LittleEndian),
80                _ => bail!("Not a Mach-O file"),
81            }
82        };
83
84        // The CPU type has an upper end flag to indicate if 64-bit, in addition
85        // to the different machine number. Check it, and clear it.
86        let mut arch = u32_from_offset(contents, 4, ordering)
87            .ok_or(anyhow!("Mach-O buffer too small for getting architecture"))?;
88        let should_be_64bit = (arch & 0x0100_0000) == 0x0100_0000;
89        if !is_64bit && should_be_64bit {
90            warn!("Mach-O had 64-bit magic header but not 64-bit magic.");
91        }
92        if is_64bit && !should_be_64bit {
93            warn!("Mach-O had 64-bit lacked magic header but had 64-bit magic.");
94        }
95        arch &= 0x00FF_FFFF;
96        let arch = match arch {
97            0x06 => Architecture::M68k,
98            0x07 => {
99                if is_64bit {
100                    Architecture::X86_64
101                } else {
102                    Architecture::X86
103                }
104            }
105            0x8 => {
106                if is_64bit {
107                    Architecture::MIPS64
108                } else {
109                    Architecture::MIPS
110                }
111            }
112            0x0C => {
113                if is_64bit {
114                    Architecture::ARM64
115                } else {
116                    Architecture::ARM
117                }
118            }
119            0x0D => Architecture::M88k,
120            0x0E => {
121                if is_64bit {
122                    Architecture::Sparc64
123                } else {
124                    Architecture::Sparc
125                }
126            }
127            0x10 => Architecture::Alpha,
128            0x12 => {
129                if is_64bit {
130                    Architecture::PowerPC64
131                } else {
132                    Architecture::PowerPC
133                }
134            }
135            other => Architecture::Other(other),
136        };
137
138        let exec_type = u16_from_offset(contents, 4, ordering)
139            .ok_or(anyhow!("Mach-O buffer too small for executable type"))?;
140        let exec_type = {
141            if (exec_type & 0x02) != 0 || (exec_type & 0x05) != 0 {
142                ExecutableType::Program
143            } else if (exec_type & 0x06) != 0 || (exec_type & 0x09) != 0 {
144                ExecutableType::Library
145            } else if (exec_type & 0x04) != 0 {
146                ExecutableType::Core
147            } else {
148                ExecutableType::Unknown(exec_type)
149            }
150        };
151
152        Ok(Self {
153            is64bit: is_64bit,
154            arch,
155            has_overlay: None,
156            ordering,
157            executable_type: exec_type,
158            os: OperatingSystem::MacOS,
159            sections: None,
160            imports: None,
161            contents,
162        })
163    }
164}
165
166impl ExecutableFile for Macho<'_> {
167    fn architecture(&self) -> Option<Architecture> {
168        Some(self.arch)
169    }
170
171    fn pointer_size(&self) -> usize {
172        if self.is64bit {
173            64
174        } else {
175            32
176        }
177    }
178
179    fn operating_system(&self) -> OperatingSystem {
180        self.os
181    }
182
183    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
184        None
185    }
186
187    #[allow(clippy::cast_possible_truncation)]
188    fn num_sections(&self) -> u32 {
189        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
190    }
191
192    fn sections(&self) -> Option<&Sections<'_>> {
193        self.sections.as_ref()
194    }
195
196    fn import_hash(&self) -> Option<Uuid> {
197        self.imports.as_ref().map(Imports::hash)
198    }
199
200    fn fuzzy_imports(&self) -> Option<String> {
201        self.imports.as_ref().map(Imports::fuzzy_hash)
202    }
203}
204
205impl SpecimenFile for Macho<'_> {
206    const MAGIC: &'static [&'static [u8]] = &[&MAGIC32, &MAGIC64, &CIGAM32, &CIGAM64];
207
208    fn type_name(&self) -> &'static str {
209        "Mach-O"
210    }
211}
212
213impl Display for Macho<'_> {
214    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
215        writeln!(f, "Mach-O file:")?;
216        writeln!(f, "\tOS: {}", self.os)?;
217        writeln!(f, "\tArchitecture: {}", self.arch)?;
218        writeln!(f, "\tOrdering: {}", self.ordering)?;
219        writeln!(f, "\tType: {}", self.executable_type)?;
220        if let Some(sections) = &self.sections {
221            writeln!(f, "\t{} sections:", sections.len())?;
222            for section in sections {
223                writeln!(f, "\t\t{section}")?;
224            }
225        }
226        if self.has_overlay == Some(true) {
227            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
228        }
229        writeln!(f, "\tSize: {}", self.contents.len())?;
230        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn arm64() {
240        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_arm64");
241
242        let macho = Macho::from(BYTES).unwrap();
243        assert!(macho.is64bit);
244        assert_eq!(macho.executable_type, ExecutableType::Program);
245        assert_eq!(macho.arch, Architecture::ARM64);
246    }
247
248    #[test]
249    fn ppc() {
250        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_ppc");
251
252        let macho = Macho::from(BYTES).unwrap();
253        assert!(!macho.is64bit);
254        //assert_eq!(macho.executable_type, ExecutableType::Program);
255        assert_eq!(macho.arch, Architecture::PowerPC);
256    }
257
258    #[test]
259    fn ppc64() {
260        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_ppc64");
261
262        let macho = Macho::from(BYTES).unwrap();
263        assert!(macho.is64bit);
264        //assert_eq!(macho.executable_type, ExecutableType::Program);
265        assert_eq!(macho.arch, Architecture::PowerPC64);
266    }
267
268    #[test]
269    fn x86_64() {
270        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_x86_64");
271
272        let macho = Macho::from(BYTES).unwrap();
273        assert!(macho.is64bit);
274        assert_eq!(macho.executable_type, ExecutableType::Program);
275        assert_eq!(macho.arch, Architecture::X86_64);
276    }
277}