threatflux_binary_analysis/formats/
pe.rs

1//! PE (Portable Executable) format parser for Windows binaries
2
3use crate::{
4    types::{
5        Architecture, BinaryFormat as Format, BinaryMetadata, Endianness, Export, Import, Section,
6        SectionPermissions, SectionType, SecurityFeatures, Symbol,
7    },
8    BinaryFormatParser, BinaryFormatTrait, Result,
9};
10use goblin::pe::{dll_characteristic::*, PE};
11
12/// PE format parser
13pub struct PeParser;
14
15impl BinaryFormatParser for PeParser {
16    fn parse(data: &[u8]) -> Result<Box<dyn BinaryFormatTrait>> {
17        let pe = PE::parse(data)?;
18        Ok(Box::new(PeBinary::new(pe, data)?))
19    }
20
21    fn can_parse(data: &[u8]) -> bool {
22        // Check for DOS header signature "MZ"
23        if data.len() < 2 || &data[0..2] != b"MZ" {
24            return false;
25        }
26
27        // Check for PE signature
28        if data.len() >= 0x3c + 4 {
29            let pe_offset =
30                u32::from_le_bytes([data[0x3c], data[0x3d], data[0x3e], data[0x3f]]) as usize;
31
32            if pe_offset + 4 <= data.len() {
33                return &data[pe_offset..pe_offset + 4] == b"PE\0\0";
34            }
35        }
36
37        false
38    }
39}
40
41/// Parsed PE binary
42pub struct PeBinary {
43    #[allow(dead_code)]
44    pe: PE<'static>,
45    #[allow(dead_code)]
46    data: Vec<u8>,
47    metadata: BinaryMetadata,
48    sections: Vec<Section>,
49    symbols: Vec<Symbol>,
50    imports: Vec<Import>,
51    exports: Vec<Export>,
52}
53
54impl PeBinary {
55    fn new(pe: PE<'_>, data: &[u8]) -> Result<Self> {
56        let data = data.to_vec();
57
58        // Convert architecture
59        let architecture = match pe.header.coff_header.machine {
60            goblin::pe::header::COFF_MACHINE_X86 => Architecture::X86,
61            goblin::pe::header::COFF_MACHINE_X86_64 => Architecture::X86_64,
62            goblin::pe::header::COFF_MACHINE_ARM => Architecture::Arm,
63            goblin::pe::header::COFF_MACHINE_ARM64 => Architecture::Arm64,
64            _ => Architecture::Unknown,
65        };
66
67        // PE is always little endian
68        let endian = Endianness::Little;
69
70        // Analyze security features
71        let security_features = analyze_security_features(&pe);
72
73        // Get base address and entry point from optional header
74        let (base_address, entry_point) = if let Some(optional_header) = &pe.header.optional_header
75        {
76            (
77                Some(optional_header.windows_fields.image_base),
78                Some(
79                    optional_header.standard_fields.address_of_entry_point as u64
80                        + optional_header.windows_fields.image_base,
81                ),
82            )
83        } else {
84            (None, None)
85        };
86
87        let metadata = BinaryMetadata {
88            size: data.len(),
89            format: Format::Pe,
90            architecture,
91            entry_point,
92            base_address,
93            timestamp: Some(pe.header.coff_header.time_date_stamp as u64),
94            compiler_info: extract_compiler_info(&pe),
95            endian,
96            security_features,
97        };
98
99        // Parse sections
100        let sections = parse_sections(&pe, &data)?;
101
102        // Parse symbols
103        let symbols = parse_symbols(&pe, &data)?;
104
105        // Parse imports and exports
106        let (imports, exports) = parse_imports_exports(&pe)?;
107
108        // Handle lifetime issues with PE struct
109        let pe_owned = unsafe { std::mem::transmute::<PE<'_>, PE<'static>>(pe) };
110
111        Ok(Self {
112            pe: pe_owned,
113            data,
114            metadata,
115            sections,
116            symbols,
117            imports,
118            exports,
119        })
120    }
121}
122
123impl BinaryFormatTrait for PeBinary {
124    fn format_type(&self) -> Format {
125        Format::Pe
126    }
127
128    fn architecture(&self) -> Architecture {
129        self.metadata.architecture
130    }
131
132    fn entry_point(&self) -> Option<u64> {
133        self.metadata.entry_point
134    }
135
136    fn sections(&self) -> &[Section] {
137        &self.sections
138    }
139
140    fn symbols(&self) -> &[Symbol] {
141        &self.symbols
142    }
143
144    fn imports(&self) -> &[Import] {
145        &self.imports
146    }
147
148    fn exports(&self) -> &[Export] {
149        &self.exports
150    }
151
152    fn metadata(&self) -> &BinaryMetadata {
153        &self.metadata
154    }
155}
156
157fn parse_sections(pe: &PE, data: &[u8]) -> Result<Vec<Section>> {
158    let mut sections = Vec::new();
159
160    for section in &pe.sections {
161        let name = String::from_utf8_lossy(&section.name)
162            .trim_end_matches('\0')
163            .to_string();
164
165        // Determine section type based on characteristics
166        let section_type =
167            if section.characteristics & goblin::pe::section_table::IMAGE_SCN_CNT_CODE != 0 {
168                SectionType::Code
169            } else if section.characteristics
170                & goblin::pe::section_table::IMAGE_SCN_CNT_INITIALIZED_DATA
171                != 0
172            {
173                if section.characteristics & goblin::pe::section_table::IMAGE_SCN_MEM_WRITE != 0 {
174                    SectionType::Data
175                } else {
176                    SectionType::ReadOnlyData
177                }
178            } else if section.characteristics
179                & goblin::pe::section_table::IMAGE_SCN_CNT_UNINITIALIZED_DATA
180                != 0
181            {
182                SectionType::Bss
183            } else {
184                SectionType::Other("PE_SECTION".to_string())
185            };
186
187        let permissions = SectionPermissions {
188            read: section.characteristics & goblin::pe::section_table::IMAGE_SCN_MEM_READ != 0,
189            write: section.characteristics & goblin::pe::section_table::IMAGE_SCN_MEM_WRITE != 0,
190            execute: section.characteristics & goblin::pe::section_table::IMAGE_SCN_MEM_EXECUTE
191                != 0,
192        };
193
194        // Extract small section data
195        let section_data = if section.size_of_raw_data <= 1024 && section.pointer_to_raw_data > 0 {
196            let start = section.pointer_to_raw_data as usize;
197            let end = start + section.size_of_raw_data as usize;
198            if end <= data.len() {
199                Some(data[start..end].to_vec())
200            } else {
201                None
202            }
203        } else {
204            None
205        };
206
207        sections.push(Section {
208            name,
209            address: section.virtual_address as u64,
210            size: section.virtual_size as u64,
211            offset: section.pointer_to_raw_data as u64,
212            permissions,
213            section_type,
214            data: section_data,
215        });
216    }
217
218    Ok(sections)
219}
220
221fn parse_symbols(_pe: &PE, _data: &[u8]) -> Result<Vec<Symbol>> {
222    // For now, return empty symbols as goblin 0.10 has changed the symbol API significantly
223    // NOTE: Symbol parsing API changed significantly in goblin 0.10
224    Ok(Vec::new())
225}
226
227fn parse_imports_exports(pe: &PE) -> crate::types::ImportExportResult {
228    let mut imports = Vec::new();
229    let mut exports = Vec::new();
230
231    // Parse imports
232    for import in &pe.imports {
233        imports.push(Import {
234            name: import.name.to_string(),
235            library: Some(import.dll.to_string()),
236            address: Some(import.rva as u64),
237            ordinal: Some(import.ordinal),
238        });
239    }
240
241    // Parse exports
242    for export in &pe.exports {
243        if let Some(name) = &export.name {
244            exports.push(Export {
245                name: name.to_string(),
246                address: export.rva as u64,
247                ordinal: None, // PE exports don't have ordinals in goblin 0.10
248                forwarded_name: export.reexport.as_ref().map(|r| match r {
249                    goblin::pe::export::Reexport::DLLName { export, lib } => {
250                        format!("{}.{}", lib, export)
251                    }
252                    goblin::pe::export::Reexport::DLLOrdinal { ordinal, lib } => {
253                        format!("{}.#{}", lib, ordinal)
254                    }
255                }),
256            });
257        }
258    }
259
260    Ok((imports, exports))
261}
262
263fn analyze_security_features(pe: &PE) -> SecurityFeatures {
264    let mut features = SecurityFeatures::default();
265
266    if let Some(optional_header) = &pe.header.optional_header {
267        let characteristics = optional_header.windows_fields.dll_characteristics;
268
269        // DEP/NX bit
270        features.nx_bit = characteristics & IMAGE_DLLCHARACTERISTICS_NX_COMPAT != 0;
271
272        // ASLR
273        features.aslr = characteristics & IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE != 0;
274
275        // High entropy ASLR
276        let _high_entropy = characteristics & IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA != 0;
277
278        // CFI (Control Flow Guard)
279        features.cfi = characteristics & IMAGE_DLLCHARACTERISTICS_GUARD_CF != 0;
280
281        // Position Independent Executable (requires relocation table removal)
282        features.pie = features.aslr; // Simplified check
283    }
284
285    // Check for stack canaries (would need more complex analysis)
286    // This would require analyzing the binary for __security_cookie references
287    features.stack_canary = false;
288
289    // Check if binary is signed (would need to parse certificate table)
290    features.signed = !pe.certificates.is_empty();
291
292    features
293}
294
295fn extract_compiler_info(pe: &PE) -> Option<String> {
296    // Look for compiler strings in debug info or rich header
297    // This is a simplified implementation
298    if pe.header.coff_header.number_of_symbol_table > 0 {
299        Some("MSVC (detected from symbols)".to_string())
300    } else {
301        None
302    }
303}