malwaredb_types/exec/macho/
fat.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use crate::exec::{macho::Macho, Architecture, ExecutableFile, OperatingSystem, Sections};
4use crate::utils::{bytes_offset_match, u32_from_offset, EntropyCalc};
5use crate::{Ordering, SpecimenFile};
6
7use std::fmt::{Display, Formatter};
8
9use anyhow::{bail, Result};
10use chrono::{DateTime, Utc};
11use tracing::instrument;
12
13const MAGIC: [u8; 4] = [0xCA, 0xFE, 0xBA, 0xBE];
14
15/// Fat Mach-O files contain executable code for more than one architecture, allowing the
16/// same binary to be run on different hardware, such as the same file working on
17/// Power PC, Intel, and Apple Silicon machines.
18///
19/// This format is an array of Mach-O files. However, the magic number is also used for Java
20/// class files, so we need to make sure the amount of stored binaries makes sense. Too high, and
21/// it's probably the Java class version and not the number of contained Mach Objects.
22#[derive(Clone, Debug)]
23pub struct FatMacho<'a> {
24    /// The embedded Mach-O files within
25    pub binaries: Vec<Macho<'a>>,
26
27    /// If the binary has extra data after the last section, could be used to hide something
28    pub has_overlay: Option<bool>,
29
30    /// The array containing the raw bytes used to parse this program
31    pub contents: &'a [u8],
32}
33
34impl<'a> FatMacho<'a> {
35    /// Fat Mach-O parsed from a sequence of bytes
36    #[instrument(name = "Fat Mach-O parser", skip(contents))]
37    pub fn from(contents: &'a [u8]) -> Result<Self> {
38        if !bytes_offset_match(contents, 0, &MAGIC) {
39            bail!("Not a Fat Mach-O file");
40        }
41
42        let contained_binaries = u32_from_offset(contents, 4, Ordering::BigEndian) as usize;
43
44        if contained_binaries > 0x20 {
45            // Might be a Java .class file
46            // https://stackoverflow.com/questions/73546728/magic-value-collision-between-macho-fat-binaries-and-java-class-files
47            bail!("Not a Fat Mach-O file");
48        }
49
50        let mut binaries = Vec::with_capacity(contained_binaries);
51        let mut offset_counter = 8;
52        let mut has_overlay = None;
53        for contained_binary_offset in 0..contained_binaries {
54            let offset =
55                u32_from_offset(contents, offset_counter + 8, Ordering::BigEndian) as usize;
56            let size = u32_from_offset(contents, offset_counter + 12, Ordering::BigEndian) as usize;
57            binaries.push(Macho::from(&contents[offset..offset + size])?);
58
59            if contained_binary_offset == contained_binaries - 1 {
60                // See if there is extra space in the binary after the last section
61                has_overlay = Some(offset + size < contents.len());
62            }
63
64            offset_counter += 20;
65        }
66
67        Ok(Self {
68            binaries,
69            has_overlay,
70            contents,
71        })
72    }
73}
74
75// TODO: Fix up `ExecutableFile` for `FatMacho`
76impl ExecutableFile for FatMacho<'_> {
77    fn architecture(&self) -> Architecture {
78        // TODO: Need something better
79        self.binaries.first().unwrap().architecture()
80    }
81
82    fn pointer_size(&self) -> usize {
83        self.binaries.first().unwrap().pointer_size()
84    }
85
86    fn operating_system(&self) -> OperatingSystem {
87        self.binaries.first().unwrap().operating_system()
88    }
89
90    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
91        None
92    }
93
94    fn num_sections(&self) -> u32 {
95        self.binaries
96            .iter()
97            .map(crate::exec::ExecutableFile::num_sections)
98            .sum()
99    }
100
101    fn sections(&self) -> Option<&Sections> {
102        self.binaries.first().unwrap().sections()
103    }
104
105    fn import_hash(&self) -> Option<String> {
106        None
107    }
108
109    fn fuzzy_imports(&self) -> Option<String> {
110        None
111    }
112}
113
114impl SpecimenFile for FatMacho<'_> {
115    const MAGIC: &'static [&'static [u8]] = &[&MAGIC];
116
117    fn type_name(&self) -> &'static str {
118        "Fat Mach-O"
119    }
120}
121
122impl Display for FatMacho<'_> {
123    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
124        writeln!(
125            f,
126            "Fat Mach-O containing {} architectures:",
127            self.binaries.len()
128        )?;
129        for bin in &self.binaries {
130            writeln!(f, "{bin}")?;
131        }
132        if self.has_overlay == Some(true) {
133            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
134        }
135        writeln!(f, "\tTotal Size: {}", self.contents.len())?;
136        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
137    }
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    use rstest::rstest;
145
146    #[rstest]
147    #[case::three_architectures(include_bytes!("../../../testdata/macho/macho_fat_arm64_x86_64"), 2)]
148    #[case::four_architectures(include_bytes!("../../../testdata/macho/macho_fat_arm64_ppc_ppc64_x86_64"), 4)]
149    #[test]
150    fn multi_arch(#[case] bytes: &[u8], #[case] expected_architectures: usize) {
151        let macho = FatMacho::from(bytes).unwrap();
152        assert_eq!(macho.binaries.len(), expected_architectures);
153    }
154
155    #[test]
156    fn java() {
157        const BYTES: &[u8] = include_bytes!("../../../testdata/class/Hello.class");
158        assert!(FatMacho::from(BYTES).is_err());
159    }
160}