threatflux_binary_analysis/formats/
macho.rs

1//! Mach-O format parser for macOS/iOS binaries
2
3use crate::{
4    types::{
5        Architecture, BinaryFormat as Format, BinaryMetadata, Endianness, Export, Import, Section,
6        SectionPermissions, SectionType, SecurityFeatures, Symbol,
7    },
8    BinaryError, BinaryFormatParser, BinaryFormatTrait, Result,
9};
10use goblin::mach::{Mach, MachO};
11
12/// Mach-O format parser
13pub struct MachOParser;
14
15impl BinaryFormatParser for MachOParser {
16    fn parse(data: &[u8]) -> Result<Box<dyn BinaryFormatTrait>> {
17        let mach = Mach::parse(data)?;
18        match mach {
19            Mach::Binary(macho) => Ok(Box::new(MachOBinary::new(macho, data)?)),
20            Mach::Fat(_) => Err(BinaryError::unsupported_format(
21                "Fat binaries not yet supported",
22            )),
23        }
24    }
25
26    fn can_parse(data: &[u8]) -> bool {
27        if data.len() < 4 {
28            return false;
29        }
30
31        let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
32        matches!(
33            magic,
34            goblin::mach::header::MH_MAGIC
35                | goblin::mach::header::MH_CIGAM
36                | goblin::mach::header::MH_MAGIC_64
37                | goblin::mach::header::MH_CIGAM_64
38                | goblin::mach::fat::FAT_MAGIC
39                | goblin::mach::fat::FAT_CIGAM
40        )
41    }
42}
43
44/// Parsed Mach-O binary
45pub struct MachOBinary {
46    #[allow(dead_code)]
47    macho: MachO<'static>,
48    #[allow(dead_code)]
49    data: Vec<u8>,
50    metadata: BinaryMetadata,
51    sections: Vec<Section>,
52    symbols: Vec<Symbol>,
53    imports: Vec<Import>,
54    exports: Vec<Export>,
55}
56
57impl MachOBinary {
58    fn new(macho: MachO<'_>, data: &[u8]) -> Result<Self> {
59        let data = data.to_vec();
60
61        // Convert architecture
62        let architecture = match macho.header.cputype() {
63            goblin::mach::constants::cputype::CPU_TYPE_X86 => Architecture::X86,
64            goblin::mach::constants::cputype::CPU_TYPE_X86_64 => Architecture::X86_64,
65            goblin::mach::constants::cputype::CPU_TYPE_ARM => Architecture::Arm,
66            goblin::mach::constants::cputype::CPU_TYPE_ARM64 => Architecture::Arm64,
67            goblin::mach::constants::cputype::CPU_TYPE_POWERPC => Architecture::PowerPC,
68            goblin::mach::constants::cputype::CPU_TYPE_POWERPC64 => Architecture::PowerPC64,
69            _ => Architecture::Unknown,
70        };
71
72        // Determine endianness from the original data parsing
73        // Goblin normalizes magic numbers, so we need to check the raw bytes
74        let endian = if data.len() >= 4 {
75            let raw_magic_be = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
76
77            // Check for big endian magic (raw bytes match canonical form)
78            if raw_magic_be == goblin::mach::header::MH_MAGIC
79                || raw_magic_be == goblin::mach::header::MH_MAGIC_64
80            {
81                Endianness::Big
82            } else {
83                // All other cases (including CIGAM variants) are little endian
84                Endianness::Little
85            }
86        } else {
87            Endianness::Little // Default for malformed data
88        };
89
90        // Analyze security features
91        let security_features = analyze_security_features(&macho);
92
93        let metadata = BinaryMetadata {
94            size: data.len(),
95            format: Format::MachO,
96            architecture,
97            entry_point: find_entry_point(&macho),
98            base_address: None, // Mach-O uses ASLR, no fixed base
99            timestamp: None,    // Not readily available in Mach-O
100            compiler_info: extract_compiler_info(&macho),
101            endian,
102            security_features,
103        };
104
105        // Parse sections
106        let sections = parse_sections(&macho, &data)?;
107
108        // Parse symbols
109        let symbols = parse_symbols(&macho)?;
110
111        // Parse imports and exports
112        let (imports, exports) = parse_imports_exports(&macho)?;
113
114        // Handle lifetime issues with MachO struct
115        let macho_owned = unsafe { std::mem::transmute::<MachO<'_>, MachO<'static>>(macho) };
116
117        Ok(Self {
118            macho: macho_owned,
119            data,
120            metadata,
121            sections,
122            symbols,
123            imports,
124            exports,
125        })
126    }
127}
128
129impl BinaryFormatTrait for MachOBinary {
130    fn format_type(&self) -> Format {
131        Format::MachO
132    }
133
134    fn architecture(&self) -> Architecture {
135        self.metadata.architecture
136    }
137
138    fn entry_point(&self) -> Option<u64> {
139        self.metadata.entry_point
140    }
141
142    fn sections(&self) -> &[Section] {
143        &self.sections
144    }
145
146    fn symbols(&self) -> &[Symbol] {
147        &self.symbols
148    }
149
150    fn imports(&self) -> &[Import] {
151        &self.imports
152    }
153
154    fn exports(&self) -> &[Export] {
155        &self.exports
156    }
157
158    fn metadata(&self) -> &BinaryMetadata {
159        &self.metadata
160    }
161}
162
163fn parse_sections(macho: &MachO, data: &[u8]) -> Result<Vec<Section>> {
164    let mut sections = Vec::new();
165
166    for segment in &macho.segments {
167        for (section, _) in &segment.sections()? {
168            let name = section.name().unwrap_or("unknown").to_string();
169
170            // Determine section type based on section name and flags
171            let section_type = if section.flags & goblin::mach::constants::S_ATTR_PURE_INSTRUCTIONS
172                != 0
173                || name.starts_with("__text")
174            {
175                SectionType::Code
176            } else if name.starts_with("__data") {
177                SectionType::Data
178            } else if name.starts_with("__const") || name.starts_with("__rodata") {
179                SectionType::ReadOnlyData
180            } else if name.starts_with("__bss") {
181                SectionType::Bss
182            } else if name.starts_with("__debug") {
183                SectionType::Debug
184            } else {
185                SectionType::Other("MACHO_SECTION".to_string())
186            };
187
188            // Mach-O section permissions are inherited from segment
189            let permissions = SectionPermissions {
190                read: segment.initprot & 0x1 != 0,    // VM_PROT_READ
191                write: segment.initprot & 0x2 != 0,   // VM_PROT_WRITE
192                execute: segment.initprot & 0x4 != 0, // VM_PROT_EXECUTE
193            };
194
195            // Extract small section data
196            let section_data = if section.size <= 1024 && section.offset > 0 {
197                let start = section.offset as usize;
198                let end = start + section.size as usize;
199                if end <= data.len() {
200                    Some(data[start..end].to_vec())
201                } else {
202                    None
203                }
204            } else {
205                None
206            };
207
208            sections.push(Section {
209                name,
210                address: section.addr,
211                size: section.size,
212                offset: section.offset as u64,
213                permissions,
214                section_type,
215                data: section_data,
216            });
217        }
218    }
219
220    Ok(sections)
221}
222
223fn parse_symbols(_macho: &MachO) -> Result<Vec<Symbol>> {
224    let symbols = Vec::new();
225
226    // NOTE: Symbol parsing API changed in goblin 0.10, requires implementation update
227    // The symbol API has changed in goblin 0.10
228    // For now, create empty symbols vector
229    // symbols = vec![];
230
231    Ok(symbols)
232}
233
234fn parse_imports_exports(macho: &MachO) -> crate::types::ImportExportResult {
235    let mut imports = Vec::new();
236    let mut exports = Vec::new();
237
238    // Parse imports from bind info
239    for import in &macho.imports()? {
240        imports.push(Import {
241            name: import.name.to_string(),
242            library: Some(import.dylib.to_string()),
243            address: Some(import.address),
244            ordinal: None,
245        });
246    }
247
248    // Parse exports from export info
249    for export in &macho.exports()? {
250        exports.push(Export {
251            name: export.name.to_string(),
252            address: export.offset,
253            ordinal: None,
254            forwarded_name: None, // Mach-O doesn't have forwarded exports like PE
255        });
256    }
257
258    Ok((imports, exports))
259}
260
261fn analyze_security_features(macho: &MachO) -> SecurityFeatures {
262    let mut features = SecurityFeatures::default();
263
264    // Check file type and flags for security features
265    let flags = macho.header.flags;
266
267    // PIE (Position Independent Executable)
268    features.pie = flags & goblin::mach::header::MH_PIE != 0;
269
270    // ASLR is generally enabled with PIE on macOS
271    features.aslr = features.pie;
272
273    // NX bit (No-Execute) is typically enabled on modern macOS
274    features.nx_bit = true; // Default assumption for modern binaries
275
276    // Check for stack canaries (would need more complex analysis)
277    features.stack_canary = false; // Would need to analyze for __stack_chk_guard
278
279    // Check load commands for additional security features
280    for _load_command in &macho.load_commands {
281        // NOTE: LoadCommand variants changed in goblin 0.10, awaiting API stabilization
282        // LoadCommand::CodeSignature(_, _) => {
283        //     features.signed = true;
284        // }
285    }
286
287    features
288}
289
290fn find_entry_point(macho: &MachO) -> Option<u64> {
291    // Look for LC_MAIN or LC_UNIX_THREAD load commands
292    for _load_command in &macho.load_commands {
293        // NOTE: LoadCommand variants changed in goblin 0.10, awaiting API stabilization
294        // LoadCommand::Main(entry) => {
295        //     return Some(entry.entryoff);
296        // }
297        // LoadCommand::UnixThread(_) => {
298        //     // Entry point is in the thread state
299        //     // This is architecture-specific parsing
300        //     return Some(0); // Placeholder - would need arch-specific parsing
301        // }
302    }
303    None
304}
305
306fn extract_compiler_info(macho: &MachO) -> Option<String> {
307    // Look for build version or version min load commands
308    for _load_command in &macho.load_commands {
309        // NOTE: LoadCommand variants changed in goblin 0.10, awaiting API stabilization
310        // LoadCommand::BuildVersion(build) => {
311        //     return Some(format!(
312        //         "Platform: {}, SDK: {}.{}.{}",
313        //         build.platform,
314        //         build.sdk >> 16,
315        //         (build.sdk >> 8) & 0xff,
316        //         build.sdk & 0xff
317        //     ));
318        // }
319    }
320    Some("Unknown Apple toolchain".to_string())
321}