threatflux_binary_analysis/formats/
mod.rs

1//! Binary format parsers and detection
2
3use crate::{BinaryError, BinaryFormat as Format, BinaryFormatParser, Result};
4
5#[cfg(feature = "elf")]
6pub mod elf;
7#[cfg(feature = "java")]
8pub mod java;
9#[cfg(feature = "macho")]
10pub mod macho;
11#[cfg(feature = "pe")]
12pub mod pe;
13#[cfg(feature = "wasm")]
14pub mod wasm;
15
16pub mod raw;
17
18/// Detect binary format from data
19pub fn detect_format(data: &[u8]) -> Result<Format> {
20    if data.is_empty() {
21        return Err(BinaryError::invalid_data("Empty data"));
22    }
23
24    // Check for ELF magic
25    #[cfg(feature = "elf")]
26    if data.len() >= 4 && &data[0..4] == b"\x7fELF" {
27        return Ok(Format::Elf);
28    }
29
30    // Check for PE magic
31    #[cfg(feature = "pe")]
32    if data.len() >= 2 && &data[0..2] == b"MZ" {
33        return Ok(Format::Pe);
34    }
35
36    // Check for Mach-O magic (handle both endiannesses)
37    #[cfg(feature = "macho")]
38    if data.len() >= 4 {
39        let magic_le = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
40        let magic_be = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
41        match magic_le {
42            0xfeedface | 0xfeedfacf | 0xcefaedfe | 0xcffaedfe => {
43                return Ok(Format::MachO);
44            }
45            _ => {}
46        }
47        match magic_be {
48            0xfeedface | 0xfeedfacf | 0xcefaedfe | 0xcffaedfe => {
49                return Ok(Format::MachO);
50            }
51            _ => {}
52        }
53    }
54
55    // Check for Java class magic
56    if data.len() >= 4 && &data[0..4] == b"\xca\xfe\xba\xbe" {
57        return Ok(Format::Java);
58    }
59
60    // Check for JAR/ZIP magic with Java class entries
61    #[cfg(feature = "java")]
62    if data.len() >= 4 && &data[0..4] == b"PK\x03\x04" {
63        use std::io::Cursor;
64        if let Ok(mut archive) = zip::ZipArchive::new(Cursor::new(data)) {
65            for i in 0..archive.len() {
66                if let Ok(file) = archive.by_index(i) {
67                    if file.name().ends_with(".class") {
68                        return Ok(Format::Java);
69                    }
70                }
71            }
72        }
73    }
74
75    // Check for WebAssembly magic
76    if data.len() >= 4 && &data[0..4] == b"\x00asm" {
77        return Ok(Format::Wasm);
78    }
79
80    // Default to raw binary for any data that doesn't match known formats
81    Ok(Format::Raw)
82}
83
84/// Parse binary data using the appropriate parser
85pub fn parse_binary(data: &[u8], format: Format) -> crate::types::ParseResult {
86    match format {
87        #[cfg(feature = "elf")]
88        Format::Elf => elf::ElfParser::parse(data),
89        #[cfg(not(feature = "elf"))]
90        Format::Elf => Err(BinaryError::unsupported_format("ELF".to_string())),
91
92        #[cfg(feature = "pe")]
93        Format::Pe => pe::PeParser::parse(data),
94        #[cfg(not(feature = "pe"))]
95        Format::Pe => Err(BinaryError::unsupported_format("PE".to_string())),
96
97        #[cfg(feature = "macho")]
98        Format::MachO => macho::MachOParser::parse(data),
99        #[cfg(not(feature = "macho"))]
100        Format::MachO => Err(BinaryError::unsupported_format("MachO".to_string())),
101
102        #[cfg(feature = "java")]
103        Format::Java => java::JavaParser::parse(data),
104        #[cfg(not(feature = "java"))]
105        Format::Java => Err(BinaryError::unsupported_format("Java".to_string())),
106        #[cfg(feature = "wasm")]
107        Format::Wasm => wasm::WasmParser::parse(data),
108        #[cfg(not(feature = "wasm"))]
109        Format::Wasm => Err(BinaryError::unsupported_format("Wasm".to_string())),
110        Format::Raw => raw::RawParser::parse(data),
111        Format::Unknown => Err(BinaryError::unsupported_format("Unknown".to_string())),
112    }
113}