malwaredb_types/exec/pe32/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3/// PE-32 fields
4mod fields;
5
6/// Convenience functions and utilities for PE-32 parsing
7mod utils;
8
9use crate::exec::{
10    Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Section, Sections,
11};
12use crate::utils::{bytes_offset_match, u32_from_offset, EntropyCalc};
13use crate::{Ordering, SpecimenFile};
14
15use std::fmt::{Display, Formatter};
16
17use anyhow::{bail, Result};
18use chrono::{DateTime, Utc};
19use flagset::FlagSet;
20use tracing::{debug, instrument};
21
22// Microsoft documentation:
23// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format
24// Wikipedia entry:
25// https://en.wikipedia.org/wiki/Portable_Executable
26
27const EXE_MAGIC: [u8; 2] = [b'M', b'Z'];
28const PE_MAGIC: [u8; 4] = [0x50, 0x45, 0x00, 0x00];
29
30/// EXE subtypes
31///
32/// PE32 files are EXE files, but not all EXE files are PE32.
33#[derive(Clone, Debug, Eq, PartialEq)]
34pub enum SubType {
35    /// MS-DOS, IBM DOS, Free DOS executable
36    DosExe,
37
38    /// .Net executable, PE32
39    DotNet,
40
41    /// Portable Executable, MS Windows
42    PE32,
43
44    /// 64-bit Portable Executable, MS Windows
45    PE32_64,
46
47    /// New Executable, 16-bit, Windows 3.1
48    NE,
49
50    /// Linear Executable, IBM OS/2
51    LE,
52}
53
54impl SubType {
55    /// Static string representation
56    #[must_use]
57    pub fn as_str(&self) -> &'static str {
58        match self {
59            SubType::DosExe => "MZ DOS executable",
60            SubType::DotNet => ".Net executable",
61            SubType::PE32 => "Portable Executable (PE32)",
62            SubType::PE32_64 => "64-bit Portable Executable (PE32+)",
63            SubType::NE => "New Executable",
64            SubType::LE => "Linear Executable",
65        }
66    }
67}
68
69impl Display for SubType {
70    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
71        write!(f, "{}", self.as_str())
72    }
73}
74
75/// PE32 files may be for different subsystems in Windows.
76///
77/// Some notable examples are graphical vs. command line applications, EFI applications, etc.
78#[derive(Clone, Debug, Eq, PartialEq)]
79pub enum SubSystem {
80    /// Unknown subsystem
81    Unknown,
82
83    /// Graphical Windows
84    WindowsGUI,
85
86    /// Command-line Windows
87    WindowsCLI,
88
89    /// Command-line OS/2
90    OS2CLI,
91
92    /// Posix subsystem
93    POSIX,
94
95    /// Native code
96    NativeCode,
97
98    /// Native driver
99    NativeDriver,
100
101    /// Windows CE (embedded)
102    WindowsCE,
103
104    /// EFI (Extensible Firmware Interface) application
105    EFI,
106
107    /// EFI driver with boot services
108    EFIBoot,
109
110    /// EFI ROM image
111    EFIRom,
112
113    /// EFI driver with runtime services
114    EFIRuntimeDriver,
115
116    /// Xbox application
117    Xbox,
118
119    /// Windows Boot application
120    WindowsBoot,
121}
122
123impl SubSystem {
124    /// Static string representation
125    #[must_use]
126    pub fn as_str(&self) -> &'static str {
127        match self {
128            SubSystem::Unknown => "Unknown subsystem",
129            SubSystem::WindowsGUI => "Windows GUI",
130            SubSystem::WindowsCLI => "Windows CLI",
131            SubSystem::OS2CLI => "OS/2 CLI",
132            SubSystem::POSIX => "Posix",
133            SubSystem::NativeCode => "Native Code",
134            SubSystem::NativeDriver => "Native Driver",
135            SubSystem::WindowsCE => "Windows CE",
136            SubSystem::EFI => "EFI",
137            SubSystem::EFIBoot => "EFI boot",
138            SubSystem::EFIRom => "EFI ROM",
139            SubSystem::EFIRuntimeDriver => "EFI driver with runtime",
140            SubSystem::Xbox => "Xbox",
141            SubSystem::WindowsBoot => "Windows Boot",
142        }
143    }
144}
145
146impl Display for SubSystem {
147    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
148        write!(f, "{}", self.as_str())
149    }
150}
151
152impl From<u16> for SubSystem {
153    fn from(value: u16) -> Self {
154        match value {
155            1 => SubSystem::NativeCode,
156            2 => SubSystem::WindowsGUI,
157            3 => SubSystem::WindowsCLI,
158            5 => SubSystem::OS2CLI,
159            7 => SubSystem::POSIX,
160            8 => SubSystem::NativeDriver,
161            9 => SubSystem::WindowsCE,
162            10 => SubSystem::EFI,
163            11 => SubSystem::EFIBoot,
164            12 => SubSystem::EFIRuntimeDriver,
165            13 => SubSystem::EFIRom,
166            14 => SubSystem::Xbox,
167            16 => SubSystem::WindowsBoot,
168            _ => SubSystem::Unknown,
169        }
170    }
171}
172
173/// The struct to partially represent EXE files.
174///
175/// Used on Windows, DOS, React OS, OS/2, and maybe others.
176///
177/// Effort is made to fail gracefully, since malware may not obey all the rules, and some information
178/// is better than none because some part of the data wasn't parsed correctly.
179#[derive(Clone, Debug)]
180pub struct EXE<'a> {
181    /// If the program is 64-bit
182    pub is64bit: bool,
183
184    /// If the binary has extra data after the last section, could be used to hide something
185    pub has_overlay: Option<bool>,
186
187    /// Instruction set architecture for this binary
188    pub arch: Architecture,
189
190    /// EXE sub-type, mostly if it's for DOS, Windows, OS/2
191    pub sub_type: SubType,
192
193    /// Operating System for this binary, likely Windows
194    pub os: OperatingSystem,
195
196    /// COFF (Common Object File Format) header of the program
197    pub coff_header: Option<fields::COFFHeader>,
198
199    /// Optional Header for this program, not optional if for Windows
200    pub optional_header: Option<fields::OptionalHeader>,
201
202    /// Executable subtype: Program or Library?
203    pub exec_type: ExecutableType,
204
205    /// Windows Subsystem used by this program
206    pub subsystem: Option<SubSystem>,
207
208    /// Sections of this binary
209    pub sections: Option<Sections<'a>>,
210
211    /// External libraries used by this application or library
212    pub imports: Option<Imports>,
213
214    /// The array containing the raw bytes used to parse this program
215    pub contents: &'a [u8],
216}
217
218impl<'a> EXE<'a> {
219    /// EXE, MZ, or PE-32 file parsed from a sequence of bytes
220    #[instrument(name = "PE32 parser", skip(contents))]
221    pub fn from(contents: &'a [u8]) -> Result<Self> {
222        if !((contents[0] == EXE_MAGIC[0] && contents[1] == EXE_MAGIC[1])
223            || (contents[0] == EXE_MAGIC[1] && contents[1] == EXE_MAGIC[0]))
224        {
225            bail!("Not a MZ, MS-DOS, or PE32 file");
226        }
227
228        let pe_magic_offset = u32_from_offset(contents, 0x3C, Ordering::LittleEndian) as usize;
229        let coff_header_offset = pe_magic_offset + 4;
230        if pe_magic_offset > contents.len()
231            || pe_magic_offset + PE_MAGIC.len() > contents.len()
232            || !bytes_offset_match(contents, pe_magic_offset, &PE_MAGIC)
233        {
234            return Ok(Self {
235                is64bit: false,
236                has_overlay: None,
237                arch: Architecture::X86,
238                sub_type: SubType::DosExe,
239                os: OperatingSystem::DOS,
240                subsystem: None,
241                sections: None,
242                coff_header: None,
243                optional_header: None,
244                exec_type: ExecutableType::Program,
245                imports: None,
246                contents,
247            });
248        }
249
250        let mut sections = Sections::default();
251        let coff = fields::COFFHeader::from(&contents[coff_header_offset..]);
252        let optional_header = fields::OptionalHeader::from(&contents[coff_header_offset + 20..]);
253
254        let is64bit = optional_header.is_64bit();
255        let sub_type = {
256            let mut sub_type = SubType::PE32;
257            if is64bit {
258                sub_type = SubType::PE32_64;
259            }
260
261            if let Some(data_directories) = optional_header.data_directories() {
262                if data_directories.clr_runtime_header.virtual_address > 0 {
263                    sub_type = SubType::DotNet;
264                }
265            }
266
267            sub_type
268        };
269
270        let exec_type = {
271            if coff
272                .characteristics
273                .contains(fields::CoffCharacteristics::FileDLL)
274            {
275                ExecutableType::Library
276            } else if coff
277                .characteristics
278                .contains(fields::CoffCharacteristics::ExecutableImage)
279            {
280                ExecutableType::Program
281            } else {
282                ExecutableType::Unknown(0)
283            }
284        };
285
286        let mut subsystem = None;
287        let mut has_overlay = false;
288        if coff
289            .characteristics
290            .contains(fields::CoffCharacteristics::ExecutableImage)
291        {
292            let mut offset = u32_from_offset(contents, 0x3C, Ordering::LittleEndian) + 24;
293            subsystem = Some(optional_header.subsystem().unwrap().into());
294
295            if is64bit {
296                offset += 240;
297            } else {
298                offset += 224;
299            }
300            let mut sections_offset = 0;
301            for sec_num in 0..coff.num_sections {
302                let section_name =
303                    String::from_utf8(contents[offset as usize..(offset + 8) as usize].to_vec())
304                        .unwrap_or_default();
305                let section_virtual_size =
306                    u32_from_offset(contents, (offset + 8) as usize, Ordering::LittleEndian);
307                let section_virtual_address =
308                    u32_from_offset(contents, (offset + 12) as usize, Ordering::LittleEndian);
309                let section_size =
310                    u32_from_offset(contents, (offset + 16) as usize, Ordering::LittleEndian);
311                let section_offset =
312                    u32_from_offset(contents, (offset + 20) as usize, Ordering::LittleEndian);
313                let section_characteristics = FlagSet::<fields::SectionFlags>::new(
314                    u32_from_offset(contents, (offset + 36) as usize, Ordering::LittleEndian),
315                )
316                .unwrap_or_default();
317                let section_executable = section_characteristics.contains(
318                    fields::SectionFlags::Executable | fields::SectionFlags::ContainsCode,
319                );
320
321                if section_offset + section_size < contents.len() as u32 {
322                    sections.push(Section {
323                        name: section_name,
324                        is_executable: section_executable,
325                        size: section_size as usize,
326                        offset: section_offset as usize,
327                        virtual_address: section_virtual_address,
328                        virtual_size: section_virtual_size,
329                        entropy: contents
330                            [section_offset as usize..(section_offset + section_size) as usize]
331                            .to_vec()
332                            .entropy(),
333                        data: Some(
334                            &contents
335                                [section_offset as usize..(section_offset + section_size) as usize],
336                        ),
337                    });
338                } else {
339                    debug!("Skipping section {sec_num} '{section_name}': offset {section_offset} + size {section_size} >= file length {}", contents.len());
340                }
341
342                if section_size + section_offset > sections_offset {
343                    sections_offset = section_size + section_offset;
344                }
345                offset += 40; // Section entries are 40 bytes
346            }
347            has_overlay = sections_offset as usize > 0
348                && (sections_offset as usize) < contents.len()
349                && contents.len() - sections_offset as usize > 0;
350        }
351
352        let mut imports = None;
353        if let Some(data_directories) = optional_header.data_directories() {
354            let import_section = sections
355                .iter()
356                .find(|x| x.virtual_address == data_directories.import_table.virtual_address);
357
358            if let Some(import_section) = import_section {
359                if import_section.size > 8 && import_section.offset > 0 {
360                    let num_imports = match optional_header {
361                        fields::OptionalHeader::OptionalHeader32(h32) => h32.num_rva_and_sizes,
362                        fields::OptionalHeader::OptionalHeader64(h64) => h64.num_rva_and_sizes,
363                    }
364                    .unwrap();
365                    imports = Some(data_directories.imports(num_imports as usize, import_section));
366                }
367            }
368        }
369
370        Ok(Self {
371            is64bit,
372            has_overlay: Some(has_overlay),
373            arch: coff.architecture(),
374            sub_type,
375            os: OperatingSystem::Windows,
376            subsystem,
377            sections: Some(sections),
378            exec_type,
379            coff_header: Some(*coff),
380            optional_header: Some(optional_header),
381            imports,
382            contents,
383        })
384    }
385}
386
387impl ExecutableFile for EXE<'_> {
388    fn architecture(&self) -> Architecture {
389        self.arch
390    }
391
392    fn pointer_size(&self) -> usize {
393        match self.sub_type {
394            SubType::PE32_64 => 64,
395            SubType::LE | SubType::NE => 16,
396            _ => 32,
397        }
398    }
399
400    fn operating_system(&self) -> OperatingSystem {
401        self.os
402    }
403
404    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
405        self.coff_header.map(|c| c.compiled_date())
406    }
407
408    fn num_sections(&self) -> u32 {
409        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
410    }
411
412    fn sections(&self) -> Option<&Sections> {
413        self.sections.as_ref()
414    }
415
416    fn import_hash(&self) -> Option<String> {
417        self.imports.as_ref().map(|i| hex::encode(i.hash()))
418    }
419
420    fn fuzzy_imports(&self) -> Option<String> {
421        self.imports.as_ref().map(Imports::fuzzy_hash)
422    }
423}
424
425impl SpecimenFile for EXE<'_> {
426    const MAGIC: &'static [&'static [u8]] = &[b"MZ", b"ZM"];
427
428    fn type_name(&self) -> &'static str {
429        self.sub_type.as_str()
430    }
431}
432
433impl Display for EXE<'_> {
434    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
435        writeln!(f, "EXE file:")?;
436        writeln!(f, "\tSubtype: {}", self.sub_type)?;
437        writeln!(f, "\tExecutable type: {}", self.exec_type)?;
438        writeln!(f, "\tOS: {}", self.os)?;
439        writeln!(f, "\tArch: {}", self.arch)?;
440        if let Some(coff) = self.coff_header {
441            writeln!(
442                f,
443                "\tCompiled: {:?}",
444                coff.compiled_date().format("%Y-%m-%d %H:%M:%S").to_string()
445            )?;
446        }
447        if let Some(subsys) = &self.subsystem {
448            writeln!(f, "\tSubsystem: {subsys}")?;
449        }
450        if let Some(sections) = &self.sections {
451            writeln!(f, "\t{} sections:", sections.len())?;
452            for section in sections {
453                writeln!(f, "\t\t{section}")?;
454            }
455        }
456        if let Some(imports) = &self.imports {
457            if imports.imports.len() == imports.expected_imports as usize {
458                writeln!(f, "\t{} imports:", imports.imports.len())?;
459            } else {
460                writeln!(
461                    f,
462                    "\t{} out of {} imports:",
463                    imports.imports.len(),
464                    imports.expected_imports
465                )?;
466            }
467            for import in &imports.imports {
468                writeln!(f, "\t\t{import}")?;
469            }
470            writeln!(f, "\t\tImport hash: {}", hex::encode(imports.hash()))?;
471            writeln!(f, "\t\tFuzzy Import hash: {}", imports.fuzzy_hash())?;
472        }
473        if self.has_overlay == Some(true) {
474            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
475        }
476        writeln!(f, "\tSize: {}", self.contents.len())?;
477        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
478    }
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484
485    use rstest::rstest;
486
487    // aclock binaries are from https://github.com/tenox7/aclock, and used as a stand-in for weird
488    // files instead of storing actual malware in the repository
489    #[rstest]
490    #[case::alpha(include_bytes!("../../../testdata/exe/pe32_aclock_axp.exe"), false, Architecture::Alpha, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
491    #[case::alpha64(include_bytes!("../../../testdata/exe/pe64_aclock_axp64.exe"), true, Architecture::Alpha, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
492    #[case::arm64(include_bytes!("../../../testdata/exe/pe64_win32_console_arm64_gnu.exe"), true, Architecture::ARM64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
493    #[case::dot_net(include_bytes!("../../../testdata/exe/pe32_dotnet.exe"), false, Architecture::X86, ExecutableType::Program, SubType::DotNet, Some(SubSystem::WindowsCLI))]
494    #[case::efi(include_bytes!("../../../testdata/exe/efi/hello.efi"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::EFI))]
495    #[case::mips(include_bytes!("../../../testdata/exe/pe32_aclock_mips.exe"), false, Architecture::MIPSEL, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
496    #[case::ppc(include_bytes!("../../../testdata/exe/pe32_aclock_ppc_winnt.exe"), false, Architecture::PowerPCLE, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
497    #[case::x86_64_console(include_bytes!("../../../testdata/exe/pe64_win32_console_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
498    #[case::x86_64_gui(include_bytes!("../../../testdata/exe/pe64_win32_gui_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsGUI))]
499    #[case::x86_64_posix(include_bytes!("../../../testdata/exe/pe64_posix_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::POSIX))]
500    #[case::x86_64_xbox(include_bytes!("../../../testdata/exe/pe64_xbox_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::Xbox))]
501    #[case::x86_64_lib_console(include_bytes!("../../../testdata/exe/pe64_lib_console_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Library, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
502    #[case::x86_64_lib_gui(include_bytes!("../../../testdata/exe/pe64_lib_gui_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Library, SubType::PE32_64, Some(SubSystem::WindowsGUI))]
503    #[test]
504    fn binaries(
505        #[case] bytes: &[u8],
506        #[case] is_64bit: bool,
507        #[case] arch: Architecture,
508        #[case] exec_type: ExecutableType,
509        #[case] sub_type: SubType,
510        #[case] subsystem: Option<SubSystem>,
511    ) {
512        let exe = EXE::from(bytes).unwrap();
513        eprintln!("{exe}");
514        assert_eq!(exe.is64bit, is_64bit);
515        assert_eq!(exe.arch, arch);
516        assert_eq!(exe.subsystem, subsystem);
517        assert_eq!(exe.exec_type, exec_type);
518        assert_eq!(exe.sub_type, sub_type);
519
520        if arch == Architecture::X86_64 {
521            if subsystem == Some(SubSystem::EFI) {
522                assert!(exe.imports.is_none());
523            } else {
524                let imports = exe.imports.unwrap();
525                if exec_type == ExecutableType::Library {
526                    assert_eq!(
527                        "466e0075eba65b201b4cc8d4d3f85cbb",
528                        hex::encode(imports.hash())
529                    );
530                } else if exec_type == ExecutableType::Program {
531                    assert_eq!(
532                        "610b6b6aa37c8e01c9855a05dcf00565",
533                        hex::encode(imports.hash())
534                    );
535                }
536            }
537        }
538    }
539}