malwaredb_types/exec/pe32/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3/// PE-32 fields
4mod fields;
5
6use crate::exec::{
7    Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Section, Sections,
8};
9use crate::utils::{bytes_offset_match, u32_from_offset, EntropyCalc};
10use crate::{Ordering, SpecimenFile};
11
12use std::fmt::{Display, Formatter};
13
14use anyhow::{bail, Result};
15use chrono::{DateTime, Utc};
16use flagset::FlagSet;
17use tracing::{debug, instrument};
18
19// Microsoft documentation:
20// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format
21// Wikipedia entry:
22// https://en.wikipedia.org/wiki/Portable_Executable
23
24const EXE_MAGIC: [u8; 2] = [b'M', b'Z'];
25const PE_MAGIC: [u8; 4] = [0x50, 0x45, 0x00, 0x00];
26
27/// EXE subtypes
28///
29/// PE32 files are EXE files, but not all EXE files are PE32.
30#[derive(Clone, Debug, Eq, PartialEq)]
31pub enum SubType {
32    /// MS-DOS, IBM DOS, Free DOS executable
33    DosExe,
34
35    /// .Net executable, PE32
36    DotNet,
37
38    /// Portable Executable, MS Windows
39    PE32,
40
41    /// 64-bit Portable Executable, MS Windows
42    PE32_64,
43
44    /// New Executable, 16-bit, Windows 3.1
45    NE,
46
47    /// Linear Executable, IBM OS/2
48    LE,
49}
50
51impl SubType {
52    /// Static string representation
53    #[must_use]
54    pub fn as_str(&self) -> &'static str {
55        match self {
56            SubType::DosExe => "MZ DOS executable",
57            SubType::DotNet => ".Net executable",
58            SubType::PE32 => "Portable Executable (PE32)",
59            SubType::PE32_64 => "64-bit Portable Executable (PE32+)",
60            SubType::NE => "New Executable",
61            SubType::LE => "Linear Executable",
62        }
63    }
64}
65
66impl Display for SubType {
67    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
68        write!(f, "{}", self.as_str())
69    }
70}
71
72/// PE32 files may be for different subsystems in Windows.
73///
74/// Some notable examples are graphical vs. command line applications, EFI applications, etc.
75#[derive(Clone, Debug, Eq, PartialEq)]
76pub enum SubSystem {
77    /// Unknown subsystem
78    Unknown,
79
80    /// Graphical Windows
81    WindowsGUI,
82
83    /// Command-line Windows
84    WindowsCLI,
85
86    /// Command-line OS/2
87    OS2CLI,
88
89    /// Posix subsystem
90    POSIX,
91
92    /// Native code
93    NativeCode,
94
95    /// Native driver
96    NativeDriver,
97
98    /// Windows CE (embedded)
99    WindowsCE,
100
101    /// EFI (Extensible Firmware Interface) application
102    EFI,
103
104    /// EFI driver with boot services
105    EFIBoot,
106
107    /// EFI ROM image
108    EFIRom,
109
110    /// EFI driver with runtime services
111    EFIRuntimeDriver,
112
113    /// Xbox application
114    Xbox,
115
116    /// Windows Boot application
117    WindowsBoot,
118}
119
120impl SubSystem {
121    /// Static string representation
122    #[must_use]
123    pub fn as_str(&self) -> &'static str {
124        match self {
125            SubSystem::Unknown => "Unknown subsystem",
126            SubSystem::WindowsGUI => "Windows GUI",
127            SubSystem::WindowsCLI => "Windows CLI",
128            SubSystem::OS2CLI => "OS/2 CLI",
129            SubSystem::POSIX => "Posix",
130            SubSystem::NativeCode => "Native Code",
131            SubSystem::NativeDriver => "Native Driver",
132            SubSystem::WindowsCE => "Windows CE",
133            SubSystem::EFI => "EFI",
134            SubSystem::EFIBoot => "EFI boot",
135            SubSystem::EFIRom => "EFI ROM",
136            SubSystem::EFIRuntimeDriver => "EFI driver with runtime",
137            SubSystem::Xbox => "Xbox",
138            SubSystem::WindowsBoot => "Windows Boot",
139        }
140    }
141}
142
143impl Display for SubSystem {
144    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
145        write!(f, "{}", self.as_str())
146    }
147}
148
149impl From<u16> for SubSystem {
150    fn from(value: u16) -> Self {
151        match value {
152            1 => SubSystem::NativeCode,
153            2 => SubSystem::WindowsGUI,
154            3 => SubSystem::WindowsCLI,
155            5 => SubSystem::OS2CLI,
156            7 => SubSystem::POSIX,
157            8 => SubSystem::NativeDriver,
158            9 => SubSystem::WindowsCE,
159            10 => SubSystem::EFI,
160            11 => SubSystem::EFIBoot,
161            12 => SubSystem::EFIRuntimeDriver,
162            13 => SubSystem::EFIRom,
163            14 => SubSystem::Xbox,
164            16 => SubSystem::WindowsBoot,
165            _ => SubSystem::Unknown,
166        }
167    }
168}
169
170/// The struct to partially represent EXE files.
171///
172/// Used on Windows, DOS, React OS, OS/2, and maybe others.
173///
174/// Effort is made to fail gracefully, since malware may not obey all the rules, and some information
175/// is better than none because some part of the data wasn't parsed correctly.
176#[derive(Clone, Debug)]
177pub struct EXE<'a> {
178    /// If the program is 64-bit
179    pub is64bit: bool,
180
181    /// If the binary has extra data after the last section, could be used to hide something
182    pub has_overlay: Option<bool>,
183
184    /// Instruction set architecture for this binary
185    pub arch: Architecture,
186
187    /// EXE sub-type, mostly if it's for DOS, Windows, OS/2
188    pub sub_type: SubType,
189
190    /// Operating System for this binary, likely Windows
191    pub os: OperatingSystem,
192
193    /// COFF (Common Object File Format) header of the program
194    pub coff_header: Option<fields::COFFHeader>,
195
196    /// Optional Header for this program, not optional if for Windows
197    pub optional_header: Option<fields::OptionalHeader>,
198
199    /// Executable subtype: Program or Library?
200    pub exec_type: ExecutableType,
201
202    /// Windows Subsystem used by this program
203    pub subsystem: Option<SubSystem>,
204
205    /// Sections of this binary
206    pub sections: Option<Sections<'a>>,
207
208    /// External libraries used by this application or library
209    pub imports: Option<Imports>,
210
211    /// The array containing the raw bytes used to parse this program
212    pub contents: &'a [u8],
213}
214
215impl<'a> EXE<'a> {
216    /// EXE, MZ, or PE-32 file parsed from a sequence of bytes
217    ///
218    /// # Errors
219    ///
220    /// Returns an error if parsing fails.
221    #[allow(clippy::too_many_lines)]
222    #[instrument(name = "PE32 parser", skip(contents))]
223    pub fn from(contents: &'a [u8]) -> Result<Self> {
224        if !((contents[0] == EXE_MAGIC[0] && contents[1] == EXE_MAGIC[1])
225            || (contents[0] == EXE_MAGIC[1] && contents[1] == EXE_MAGIC[0]))
226        {
227            bail!("Not a MZ, MS-DOS, or PE32 file");
228        }
229
230        if contents.len() <= 0x40 {
231            bail!("Not enough bytes in PE32 file");
232        }
233
234        let pe_magic_offset =
235            u32_from_offset(contents, 0x3C, Ordering::LittleEndian).unwrap_or_default() as usize;
236        let coff_header_offset = pe_magic_offset + 4;
237        if pe_magic_offset > contents.len()
238            || pe_magic_offset + PE_MAGIC.len() > contents.len()
239            || !bytes_offset_match(contents, pe_magic_offset, &PE_MAGIC)
240        {
241            return Ok(Self {
242                is64bit: false,
243                has_overlay: None,
244                arch: Architecture::X86,
245                sub_type: SubType::DosExe,
246                os: OperatingSystem::DOS,
247                subsystem: None,
248                sections: None,
249                coff_header: None,
250                optional_header: None,
251                exec_type: ExecutableType::Program,
252                imports: None,
253                contents,
254            });
255        }
256
257        let mut sections = Sections::default();
258        let coff = fields::COFFHeader::from(&contents[coff_header_offset..])?;
259        let Some(optional_header) =
260            fields::OptionalHeader::from(&contents[coff_header_offset + 20..])
261        else {
262            return Ok(Self {
263                is64bit: false,
264                has_overlay: None,
265                arch: Architecture::X86,
266                sub_type: SubType::DosExe,
267                os: OperatingSystem::DOS,
268                subsystem: None,
269                sections: None,
270                coff_header: None,
271                optional_header: None,
272                exec_type: ExecutableType::Program,
273                imports: None,
274                contents,
275            });
276        };
277
278        let is64bit = optional_header.is_64bit();
279        let sub_type = {
280            let mut sub_type = SubType::PE32;
281            if is64bit {
282                sub_type = SubType::PE32_64;
283            }
284
285            if let Some(data_directories) = optional_header.data_directories() {
286                if data_directories.clr_runtime_header.virtual_address > 0 {
287                    sub_type = SubType::DotNet;
288                }
289            }
290
291            sub_type
292        };
293
294        let exec_type = {
295            if coff
296                .characteristics
297                .contains(fields::CoffCharacteristics::FileDLL)
298            {
299                ExecutableType::Library
300            } else if coff
301                .characteristics
302                .contains(fields::CoffCharacteristics::ExecutableImage)
303            {
304                ExecutableType::Program
305            } else {
306                ExecutableType::Unknown(0)
307            }
308        };
309
310        let mut subsystem = None;
311        let mut has_overlay = false;
312        if coff
313            .characteristics
314            .contains(fields::CoffCharacteristics::ExecutableImage)
315        {
316            let mut offset =
317                u32_from_offset(contents, 0x3C, Ordering::LittleEndian).unwrap_or_default() + 24;
318            if let Some(subsys) = optional_header.subsystem() {
319                subsystem = Some(subsys.into());
320            }
321
322            if is64bit {
323                offset += 240;
324            } else {
325                offset += 224;
326            }
327            let mut sections_offset = 0;
328            for sec_num in 0..coff.num_sections {
329                if (offset + 40) as usize > contents.len() {
330                    break;
331                }
332                let section_name =
333                    String::from_utf8(contents[offset as usize..(offset + 8) as usize].to_vec())
334                        .unwrap_or_default();
335                let section_virtual_size =
336                    u32_from_offset(contents, (offset + 8) as usize, Ordering::LittleEndian);
337                let section_virtual_address =
338                    u32_from_offset(contents, (offset + 12) as usize, Ordering::LittleEndian);
339                let section_size =
340                    u32_from_offset(contents, (offset + 16) as usize, Ordering::LittleEndian)
341                        .unwrap_or_default() as usize;
342                let section_offset =
343                    u32_from_offset(contents, (offset + 20) as usize, Ordering::LittleEndian)
344                        .unwrap_or_default() as usize;
345                let section_characteristics = FlagSet::<fields::SectionFlags>::new(
346                    u32_from_offset(contents, (offset + 36) as usize, Ordering::LittleEndian)
347                        .unwrap_or_default(),
348                )
349                .unwrap_or_default();
350                let section_executable = section_characteristics.contains(
351                    fields::SectionFlags::Executable | fields::SectionFlags::ContainsCode,
352                );
353
354                if section_offset + section_size < contents.len() {
355                    sections.push(Section {
356                        name: section_name,
357                        is_executable: section_executable,
358                        size: section_size,
359                        offset: section_offset,
360                        virtual_address: section_virtual_address.unwrap_or_default(),
361                        virtual_size: section_virtual_size.unwrap_or_default(),
362                        entropy: contents[section_offset..(section_offset + section_size)]
363                            .to_vec()
364                            .entropy(),
365                        data: Some(&contents[section_offset..(section_offset + section_size)]),
366                    });
367                } else {
368                    debug!("PE32: Skipping section {sec_num} '{section_name}': offset {section_offset} + size {section_size} >= file length {}", contents.len());
369                }
370
371                if section_size + section_offset > sections_offset {
372                    sections_offset = section_size + section_offset;
373                }
374                offset += 40; // Section entries are 40 bytes
375            }
376            has_overlay = sections_offset > 0
377                && sections_offset < contents.len()
378                && contents.len() - sections_offset > 0;
379        }
380
381        let mut imports = None;
382        if let Some(data_directories) = optional_header.data_directories() {
383            let import_section = sections
384                .iter()
385                .find(|x| x.virtual_address == data_directories.import_table.virtual_address);
386
387            if let Some(import_section) = import_section {
388                if import_section.size > 8 && import_section.offset > 0 {
389                    if let Some(num_imports) = match optional_header {
390                        fields::OptionalHeader::OptionalHeader32(h32) => h32.num_rva_and_sizes,
391                        fields::OptionalHeader::OptionalHeader64(h64) => h64.num_rva_and_sizes,
392                    } {
393                        imports =
394                            Some(data_directories.imports(num_imports as usize, import_section));
395                    }
396                }
397            }
398        }
399
400        Ok(Self {
401            is64bit,
402            has_overlay: Some(has_overlay),
403            arch: coff.architecture(),
404            sub_type,
405            os: OperatingSystem::Windows,
406            subsystem,
407            sections: Some(sections),
408            exec_type,
409            coff_header: Some(*coff),
410            optional_header: Some(optional_header),
411            imports,
412            contents,
413        })
414    }
415}
416
417impl ExecutableFile for EXE<'_> {
418    fn architecture(&self) -> Option<Architecture> {
419        Some(self.arch)
420    }
421
422    fn pointer_size(&self) -> usize {
423        match self.sub_type {
424            SubType::PE32_64 => 64,
425            SubType::LE | SubType::NE => 16,
426            _ => 32,
427        }
428    }
429
430    fn operating_system(&self) -> OperatingSystem {
431        self.os
432    }
433
434    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
435        self.coff_header.map(|c| c.compiled_date())
436    }
437
438    #[allow(clippy::cast_possible_truncation)]
439    fn num_sections(&self) -> u32 {
440        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
441    }
442
443    fn sections(&self) -> Option<&Sections<'_>> {
444        self.sections.as_ref()
445    }
446
447    fn import_hash(&self) -> Option<String> {
448        self.imports.as_ref().map(|i| hex::encode(i.hash()))
449    }
450
451    fn fuzzy_imports(&self) -> Option<String> {
452        self.imports.as_ref().map(Imports::fuzzy_hash)
453    }
454}
455
456impl SpecimenFile for EXE<'_> {
457    const MAGIC: &'static [&'static [u8]] = &[b"MZ", b"ZM"];
458
459    fn type_name(&self) -> &'static str {
460        self.sub_type.as_str()
461    }
462}
463
464impl Display for EXE<'_> {
465    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
466        writeln!(f, "EXE file:")?;
467        writeln!(f, "\tSubtype: {}", self.sub_type)?;
468        writeln!(f, "\tExecutable type: {}", self.exec_type)?;
469        writeln!(f, "\tOS: {}", self.os)?;
470        writeln!(f, "\tArch: {}", self.arch)?;
471        if let Some(coff) = self.coff_header {
472            writeln!(
473                f,
474                "\tCompiled: {:?}",
475                coff.compiled_date().format("%Y-%m-%d %H:%M:%S").to_string()
476            )?;
477        }
478        if let Some(subsys) = &self.subsystem {
479            writeln!(f, "\tSubsystem: {subsys}")?;
480        }
481        if let Some(sections) = &self.sections {
482            writeln!(f, "\t{} sections:", sections.len())?;
483            for section in sections {
484                writeln!(f, "\t\t{section}")?;
485            }
486        }
487        if let Some(imports) = &self.imports {
488            if imports.imports.len() == imports.expected_imports as usize {
489                writeln!(f, "\t{} imports:", imports.imports.len())?;
490            } else {
491                writeln!(
492                    f,
493                    "\t{} out of {} imports:",
494                    imports.imports.len(),
495                    imports.expected_imports
496                )?;
497            }
498            for import in &imports.imports {
499                writeln!(f, "\t\t{import}")?;
500            }
501            writeln!(f, "\t\tImport hash: {}", hex::encode(imports.hash()))?;
502            writeln!(f, "\t\tFuzzy Import hash: {}", imports.fuzzy_hash())?;
503        }
504        if self.has_overlay == Some(true) {
505            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
506        }
507        writeln!(f, "\tSize: {}", self.contents.len())?;
508        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
509    }
510}
511
512#[cfg(test)]
513mod tests {
514    use super::*;
515
516    use rstest::rstest;
517
518    // aclock binaries are from https://github.com/tenox7/aclock, and used as a stand-in for weird
519    // files instead of storing actual malware in the repository
520    #[rstest]
521    #[case::alpha(include_bytes!("../../../testdata/exe/pe32_aclock_axp.exe"), false, Architecture::Alpha, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
522    #[case::alpha64(include_bytes!("../../../testdata/exe/pe64_aclock_axp64.exe"), true, Architecture::Alpha64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
523    #[case::arm64(include_bytes!("../../../testdata/exe/pe64_win32_console_arm64_gnu.exe"), true, Architecture::ARM64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
524    #[case::dot_net(include_bytes!("../../../testdata/exe/pe32_dotnet.exe"), false, Architecture::X86, ExecutableType::Program, SubType::DotNet, Some(SubSystem::WindowsCLI))]
525    #[case::efi(include_bytes!("../../../testdata/exe/efi/hello.efi"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::EFI))]
526    #[case::mips(include_bytes!("../../../testdata/exe/pe32_aclock_mips.exe"), false, Architecture::MIPSEL, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
527    #[case::ppc(include_bytes!("../../../testdata/exe/pe32_aclock_ppc_winnt.exe"), false, Architecture::PowerPCLE, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
528    #[case::x86_64_console(include_bytes!("../../../testdata/exe/pe64_win32_console_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
529    #[case::x86_64_gui(include_bytes!("../../../testdata/exe/pe64_win32_gui_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsGUI))]
530    #[case::x86_64_posix(include_bytes!("../../../testdata/exe/pe64_posix_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::POSIX))]
531    #[case::x86_64_xbox(include_bytes!("../../../testdata/exe/pe64_xbox_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::Xbox))]
532    #[case::x86_64_lib_console(include_bytes!("../../../testdata/exe/pe64_lib_console_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Library, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
533    #[case::x86_64_lib_gui(include_bytes!("../../../testdata/exe/pe64_lib_gui_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Library, SubType::PE32_64, Some(SubSystem::WindowsGUI))]
534    #[case::x86_exe(include_bytes!("../../../testdata/exe/aclock-i386-win16.exe"), false, Architecture::X86, ExecutableType::Program, SubType::DosExe, None)]
535    #[case::i8086_exe(include_bytes!("../../../testdata/exe/aclock-i8086-win1x.exe"), false, Architecture::X86, ExecutableType::Program, SubType::DosExe, None)]
536    #[test]
537    fn binaries(
538        #[case] bytes: &[u8],
539        #[case] is_64bit: bool,
540        #[case] arch: Architecture,
541        #[case] exec_type: ExecutableType,
542        #[case] sub_type: SubType,
543        #[case] subsystem: Option<SubSystem>,
544    ) {
545        let exe = EXE::from(bytes).unwrap();
546        eprintln!("{exe}");
547        assert_eq!(exe.is64bit, is_64bit);
548        assert_eq!(exe.arch, arch);
549        assert_eq!(exe.subsystem, subsystem);
550        assert_eq!(exe.exec_type, exec_type);
551        assert_eq!(exe.sub_type, sub_type);
552
553        if arch == Architecture::X86_64 {
554            if subsystem == Some(SubSystem::EFI) {
555                assert!(exe.imports.is_none());
556            } else {
557                let imports = exe.imports.unwrap();
558                if exec_type == ExecutableType::Library {
559                    assert_eq!(
560                        "466e0075eba65b201b4cc8d4d3f85cbb",
561                        hex::encode(imports.hash())
562                    );
563                } else if exec_type == ExecutableType::Program {
564                    assert_eq!(
565                        "610b6b6aa37c8e01c9855a05dcf00565",
566                        hex::encode(imports.hash())
567                    );
568                }
569            }
570        }
571    }
572}