#![allow(clippy::uninlined_format_args)]
#![cfg(feature = "macho")]
use pretty_assertions::assert_eq;
use rstest::*;
use threatflux_binary_analysis::types::*;
use threatflux_binary_analysis::BinaryAnalyzer;
mod common;
use common::fixtures::*;
#[test]
fn test_macho_header_parsing() {
let data = create_realistic_macho_64();
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
assert_eq!(result.format, BinaryFormat::MachO);
assert_eq!(result.architecture, Architecture::X86_64);
assert_eq!(result.entry_point, Some(0x100001000));
}
#[rstest]
#[case(&[0xfe, 0xed, 0xfa, 0xce], Architecture::X86, Endianness::Big, "MH_MAGIC - 32-bit big endian")]
#[case(&[0xce, 0xfa, 0xed, 0xfe], Architecture::X86, Endianness::Little, "MH_CIGAM - 32-bit little endian")]
#[case(&[0xfe, 0xed, 0xfa, 0xcf], Architecture::X86_64, Endianness::Big, "MH_MAGIC_64 - 64-bit big endian")]
#[case(&[0xcf, 0xfa, 0xed, 0xfe], Architecture::X86_64, Endianness::Little, "MH_CIGAM_64 - 64-bit little endian")]
fn test_macho_magic_detection(
#[case] magic: &[u8],
#[case] expected_arch: Architecture,
#[case] expected_endian: Endianness,
#[case] description: &str,
) {
let mut data = vec![0; 1024];
data[0..4].copy_from_slice(magic);
let cpu_type = if expected_arch == Architecture::X86_64 {
0x01000007u32
} else {
0x00000007u32
};
let cpu_subtype = 0x00000003u32;
let filetype = 0x00000002u32;
if expected_endian == Endianness::Big {
data[4..8].copy_from_slice(&cpu_type.to_be_bytes());
data[8..12].copy_from_slice(&cpu_subtype.to_be_bytes());
data[12..16].copy_from_slice(&filetype.to_be_bytes());
} else {
data[4..8].copy_from_slice(&cpu_type.to_le_bytes());
data[8..12].copy_from_slice(&cpu_subtype.to_le_bytes());
data[12..16].copy_from_slice(&filetype.to_le_bytes());
}
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
assert_eq!(result.architecture, expected_arch, "Failed: {description}");
assert_eq!(
result.metadata.endian, expected_endian,
"Wrong endianness for: {description}"
);
}
#[rstest]
#[case(0x00000007, 0x00000003, Architecture::X86, "CPU_TYPE_X86")]
#[case(0x01000007, 0x00000003, Architecture::X86_64, "CPU_TYPE_X86_64")]
#[case(0x0000000c, 0x00000000, Architecture::Arm, "CPU_TYPE_ARM")]
#[case(0x0100000c, 0x00000000, Architecture::Arm64, "CPU_TYPE_ARM64")]
#[case(0x00000012, 0x00000000, Architecture::PowerPC, "CPU_TYPE_POWERPC")]
#[case(0x01000012, 0x00000000, Architecture::PowerPC64, "CPU_TYPE_POWERPC64")]
#[case(0x00000000, 0x00000000, Architecture::Unknown, "CPU_TYPE_ANY")]
fn test_macho_cpu_types(
#[case] cputype: u32,
#[case] cpusubtype: u32,
#[case] expected_arch: Architecture,
#[case] description: &str,
) {
let mut data = create_realistic_macho_64();
let cputype_bytes = cputype.to_le_bytes();
let cpusubtype_bytes = cpusubtype.to_le_bytes();
data[4..8].copy_from_slice(&cputype_bytes);
data[8..12].copy_from_slice(&cpusubtype_bytes);
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
assert_eq!(result.architecture, expected_arch, "Failed: {description}");
}
#[rstest]
#[case(0x00000001, "MH_OBJECT - Object file")]
#[case(0x00000002, "MH_EXECUTE - Executable")]
#[case(0x00000003, "MH_FVMLIB - Fixed VM shared library")]
#[case(0x00000004, "MH_CORE - Core file")]
#[case(0x00000005, "MH_PRELOAD - Preloaded executable")]
#[case(0x00000006, "MH_DYLIB - Dynamic library")]
#[case(0x00000007, "MH_DYLINKER - Dynamic link editor")]
#[case(0x00000008, "MH_BUNDLE - Bundle")]
#[case(0x00000009, "MH_DYLIB_STUB - Shared library stub")]
#[case(0x0000000a, "MH_DSYM - Debug symbols")]
#[case(0x0000000b, "MH_KEXT_BUNDLE - Kernel extension")]
fn test_macho_file_types(#[case] filetype: u32, #[case] _description: &str) {
let mut data = create_realistic_macho_64();
let filetype_bytes = filetype.to_le_bytes();
data[12..16].copy_from_slice(&filetype_bytes);
let analyzer = BinaryAnalyzer::new();
let result = analyzer.analyze(&data);
let parsed = match result {
Ok(parsed) => parsed,
Err(_) => {
return;
}
};
assert_eq!(parsed.format, BinaryFormat::MachO);
}
#[rstest]
#[case(0x00000001, "MH_NOUNDEFS")]
#[case(0x00000002, "MH_INCRLINK")]
#[case(0x00000004, "MH_DYLDLINK")]
#[case(0x00000008, "MH_BINDATLOAD")]
#[case(0x00000010, "MH_PREBOUND")]
#[case(0x00000020, "MH_SPLIT_SEGS")]
#[case(0x00000040, "MH_LAZY_INIT")]
#[case(0x00000080, "MH_TWOLEVEL")]
#[case(0x00000100, "MH_FORCE_FLAT")]
#[case(0x00000200, "MH_NOMULTIDEFS")]
#[case(0x00000400, "MH_NOFIXPREBINDING")]
#[case(0x00000800, "MH_PREBINDABLE")]
#[case(0x00001000, "MH_ALLMODSBOUND")]
#[case(0x00002000, "MH_SUBSECTIONS_VIA_SYMBOLS")]
#[case(0x00004000, "MH_CANONICAL")]
#[case(0x00008000, "MH_WEAK_DEFINES")]
#[case(0x00010000, "MH_BINDS_TO_WEAK")]
#[case(0x00020000, "MH_ALLOW_STACK_EXECUTION")]
#[case(0x00040000, "MH_ROOT_SAFE")]
#[case(0x00080000, "MH_SETUID_SAFE")]
#[case(0x00100000, "MH_NO_REEXPORTED_DYLIBS")]
#[case(0x00200000, "MH_PIE")]
#[case(0x00400000, "MH_DEAD_STRIPPABLE_DYLIB")]
#[case(0x00800000, "MH_HAS_TLV_DESCRIPTORS")]
#[case(0x01000000, "MH_NO_HEAP_EXECUTION")]
fn test_macho_flags(#[case] flag: u32, #[case] _description: &str) {
let mut data = create_realistic_macho_64();
let flag_bytes = flag.to_le_bytes();
data[24..28].copy_from_slice(&flag_bytes);
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
let security = &result.metadata.security_features;
if flag & 0x00200000 != 0 {
assert!(security.pie, "PIE flag should be detected");
}
if flag & 0x00020000 != 0 {
assert!(
!security.nx_bit,
"Stack execution allowed should disable NX"
);
}
if flag & 0x01000000 != 0 {
assert!(security.nx_bit, "No heap execution should enable NX");
}
assert_eq!(result.format, BinaryFormat::MachO);
}
#[test]
fn test_macho_load_commands() {
let data = create_macho_with_load_commands();
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
let sections = &result.sections;
let _imports = &result.imports;
let _exports = &result.exports;
let _symbols = &result.symbols;
assert!(!sections.is_empty(), "Should have sections from segments");
let section_names: Vec<&str> = sections.iter().map(|s| s.name.as_str()).collect();
let expected_segments = vec!["__TEXT", "__DATA", "__LINKEDIT"];
for expected in &expected_segments {
if section_names.iter().any(|&name| name.contains(expected)) {
let section = sections.iter().find(|s| s.name.contains(expected)).unwrap();
assert!(section.size > 0, "Segment {expected} should have size");
}
}
}
#[test]
fn test_lc_segment_64_parsing() {
let data = create_macho_with_segments();
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
let sections = &result.sections;
let text_sections: Vec<_> = sections
.iter()
.filter(|s| s.name.starts_with("__TEXT") || s.name == "__text")
.collect();
if !text_sections.is_empty() {
for section in text_sections {
assert!(
section.permissions.read,
"__TEXT section should be readable"
);
assert!(
section.permissions.execute,
"__TEXT section should be executable"
);
assert!(
!section.permissions.write,
"__TEXT section should not be writable"
);
}
}
let data_sections: Vec<_> = sections
.iter()
.filter(|s| s.name.starts_with("__DATA") || s.name == "__data")
.collect();
if !data_sections.is_empty() {
for section in data_sections {
assert!(
section.permissions.read,
"__DATA section should be readable"
);
assert!(
section.permissions.write,
"__DATA section should be writable"
);
assert!(
!section.permissions.execute,
"__DATA section should not be executable"
);
}
}
}
#[test]
fn test_lc_symtab_parsing() {
let data = create_macho_with_symbol_table();
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
let symbols = &result.symbols;
if !symbols.is_empty() {
for symbol in symbols {
assert!(!symbol.name.is_empty(), "Symbol should have a name");
match symbol.symbol_type {
SymbolType::Function => {
assert!(symbol.address > 0, "Function symbol should have address");
}
SymbolType::Object => {
}
SymbolType::Section => {
}
_ => {}
}
match symbol.binding {
SymbolBinding::Global => {
}
SymbolBinding::Local => {
}
SymbolBinding::Weak => {
}
_ => {}
}
}
}
}
#[test]
fn test_lc_dysymtab_parsing() {
let data = create_macho_with_dynamic_symbol_table();
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
let imports = &result.imports;
let exports = &result.exports;
if !imports.is_empty() {
for import in imports {
assert!(!import.name.is_empty(), "Import should have a name");
}
}
if !exports.is_empty() {
for export in exports {
assert!(!export.name.is_empty(), "Export should have a name");
assert!(export.address > 0, "Export should have address");
}
}
}
#[test]
fn test_lc_load_dylib_parsing() {
let data = create_macho_with_dylib_dependencies();
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
let imports = &result.imports;
if !imports.is_empty() {
let library_names: Vec<_> = imports.iter().filter_map(|i| i.library.as_ref()).collect();
let common_libs = [
"libSystem.B.dylib",
"libc++.1.dylib",
"libz.1.dylib",
"Foundation.framework",
"CoreFoundation.framework",
];
for lib in &library_names {
assert!(!lib.is_empty(), "Library name should not be empty");
if common_libs.iter().any(|&common| lib.contains(common)) {
}
}
}
}
#[test]
fn test_lc_main_parsing() {
let data = create_macho_with_main_command();
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
let entry_point = result.entry_point;
assert!(
entry_point.is_some(),
"Should have entry point from LC_MAIN"
);
if let Some(entry) = entry_point {
assert!(entry > 0, "Entry point should be valid address");
assert!(entry >= 0x100000000, "Entry point should be in user space");
}
}
#[test]
fn test_lc_code_signature_parsing() {
let data = create_macho_with_code_signature();
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
let security = &result.metadata.security_features;
assert!(security.signed, "Should detect code signature");
if let Some(ref _compiler_info) = result.metadata.compiler_info {
}
}
#[test]
fn test_lc_encryption_info_parsing() {
let data = create_macho_with_encryption_info();
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
assert_eq!(result.format, BinaryFormat::MachO);
}
#[rstest]
#[case("S_REGULAR", 0x0, SectionType::Data)]
#[case("S_ZEROFILL", 0x1, SectionType::Bss)]
#[case("S_CSTRING_LITERALS", 0x2, SectionType::String)]
#[case("S_4BYTE_LITERALS", 0x3, SectionType::Data)]
#[case("S_8BYTE_LITERALS", 0x4, SectionType::Data)]
#[case("S_LITERAL_POINTERS", 0x5, SectionType::Data)]
#[case("S_NON_LAZY_SYMBOL_POINTERS", 0x6, SectionType::Data)]
#[case("S_LAZY_SYMBOL_POINTERS", 0x7, SectionType::Data)]
#[case("S_SYMBOL_STUBS", 0x8, SectionType::Code)]
#[case("S_MOD_INIT_FUNC_POINTERS", 0x9, SectionType::Data)]
#[case("S_MOD_TERM_FUNC_POINTERS", 0xa, SectionType::Data)]
#[case("S_COALESCED", 0xb, SectionType::Data)]
#[case("S_GB_ZEROFILL", 0xc, SectionType::Bss)]
#[case("S_INTERPOSING", 0xd, SectionType::Data)]
#[case("S_16BYTE_LITERALS", 0xe, SectionType::Data)]
#[case("S_DTRACE_DOF", 0xf, SectionType::Debug)]
fn test_macho_section_types(
#[case] _type_name: &str,
#[case] section_type: u32,
#[case] expected_type: SectionType,
) {
let data = create_macho_with_section_type(section_type);
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
let sections = &result.sections;
if let Some(section) = sections.first() {
assert_eq!(
section.section_type, expected_type,
"Wrong section type for {_type_name}"
);
}
}
#[rstest]
#[case(0x80000000, "S_ATTR_PURE_INSTRUCTIONS")]
#[case(0x40000000, "S_ATTR_NO_TOC")]
#[case(0x20000000, "S_ATTR_STRIP_STATIC_SYMS")]
#[case(0x10000000, "S_ATTR_NO_DEAD_STRIP")]
#[case(0x08000000, "S_ATTR_LIVE_SUPPORT")]
#[case(0x04000000, "S_ATTR_SELF_MODIFYING_CODE")]
#[case(0x02000000, "S_ATTR_DEBUG")]
#[case(0x00000400, "S_ATTR_SOME_INSTRUCTIONS")]
#[case(0x00000200, "S_ATTR_EXT_RELOC")]
#[case(0x00000100, "S_ATTR_LOC_RELOC")]
fn test_macho_section_attributes(#[case] attribute: u32, #[case] description: &str) {
let data = create_macho_with_section_attribute(attribute);
let analyzer = BinaryAnalyzer::new();
let result = match analyzer.analyze(&data) {
Ok(result) => result,
Err(_) => {
return;
}
};
let sections = &result.sections;
assert!(
!sections.is_empty(),
"Should have sections for: {description}"
);
if let Some(section) = sections.first() {
if attribute & 0x80000000 != 0 {
assert_eq!(
section.section_type,
SectionType::Code,
"Pure instructions should be code section"
);
assert!(
section.permissions.execute,
"Pure instructions should be executable"
);
}
if attribute & 0x02000000 != 0 {
assert_eq!(
section.section_type,
SectionType::Debug,
"Debug attribute should create debug section"
);
}
}
}
#[rstest]
#[case("invalid_magic", &[0x00, 0x00, 0x00, 0x00], "Invalid magic number")]
#[case("truncated_header", &[0xfe, 0xed, 0xfa, 0xcf, 0x07], "Truncated header")]
#[case(
"invalid_load_commands",
create_macho_with_invalid_load_commands(),
"Invalid load commands"
)]
#[case(
"overlapping_segments",
create_macho_with_overlapping_segments(),
"Overlapping segments"
)]
#[case(
"invalid_section_count",
&create_macho_with_invalid_section_count(),
"Invalid section count"
)]
fn test_macho_error_handling(
#[case] _test_name: &str,
#[case] data: &[u8],
#[case] description: &str,
) {
let analyzer = BinaryAnalyzer::new();
let result = analyzer.analyze(data);
if let Err(error) = result {
let error_msg = format!("{error}");
assert!(
!error_msg.is_empty(),
"Error message should not be empty for: {description}"
);
} else {
let parsed = result.unwrap();
assert!(
parsed.format == BinaryFormat::MachO || parsed.format == BinaryFormat::Raw,
"Expected MachO or Raw format, got: {:?} for: {description}",
parsed.format
);
}
}
#[test]
fn test_macho_fat_binary_parsing() {
let data = create_fat_macho_binary();
let analyzer = BinaryAnalyzer::new();
let result = analyzer.analyze(&data);
if let Ok(parsed) = result {
assert!(
parsed.format == BinaryFormat::MachO || parsed.format == BinaryFormat::Java,
"Expected MachO or Java format, got: {:?}",
parsed.format
);
} else {
let error = result.err().unwrap();
let error_msg = format!("{error}");
assert!(!error_msg.is_empty());
}
}
#[test]
fn test_macho_performance_large_file() {
let data = create_large_macho_binary(15 * 1024 * 1024);
let start = std::time::Instant::now();
let analyzer = BinaryAnalyzer::new();
let result = analyzer.analyze(&data);
let duration = start.elapsed();
if result.is_err() {
assert!(
duration < std::time::Duration::from_secs(1),
"Parser should fail quickly on malformed data"
);
return;
}
assert!(
duration.as_secs() < 8,
"Should parse large file in reasonable time"
);
}
#[test]
fn test_macho_concurrent_parsing() {
use std::sync::Arc;
use std::thread;
let data = Arc::new(create_realistic_macho_64());
let mut handles = vec![];
for _i in 0..6 {
let data_clone = Arc::clone(&data);
let handle = thread::spawn(move || {
let analyzer = BinaryAnalyzer::new();
let result = analyzer.analyze(&data_clone);
result.ok()
});
handles.push(handle);
}
for handle in handles {
let parsed_opt = handle.join().unwrap();
if let Some(parsed) = parsed_opt {
assert_eq!(parsed.format, BinaryFormat::MachO);
}
}
}
fn create_macho_with_load_commands() -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(16384, 0);
data
}
fn create_macho_with_segments() -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(32768, 0);
data
}
fn create_macho_with_symbol_table() -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(65536, 0);
data
}
fn create_macho_with_dynamic_symbol_table() -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(98304, 0);
data
}
fn create_macho_with_dylib_dependencies() -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(131072, 0);
data
}
fn create_macho_with_main_command() -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(8192, 0);
data
}
fn create_macho_with_code_signature() -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(196608, 0);
data
}
fn create_macho_with_encryption_info() -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(49152, 0);
data
}
fn create_macho_with_section_type(_section_type: u32) -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(24576, 0);
data
}
fn create_macho_with_section_attribute(_attribute: u32) -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(20480, 0);
data
}
fn create_macho_with_invalid_load_commands() -> &'static [u8] {
static INVALID_LC: &[u8] = &[
0xfe, 0xed, 0xfa, 0xcf, 0x07, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
INVALID_LC
}
fn create_macho_with_overlapping_segments() -> &'static [u8] {
static OVERLAPPING: &[u8] = &[
0xfe, 0xed, 0xfa, 0xcf, 0x07, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, ];
OVERLAPPING
}
fn create_macho_with_invalid_section_count() -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(4096, 0);
data
}
fn create_fat_macho_binary() -> Vec<u8> {
vec![
0xca, 0xfe, 0xba, 0xbe, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x01, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00,
0x0c, ]
}
fn create_large_macho_binary(size: usize) -> Vec<u8> {
let mut data = create_realistic_macho_64();
data.resize(size, 0);
data
}