Skip to main content

malwaredb_types/exec/pe32/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3/// PE-32 fields
4mod fields;
5
6use crate::exec::{
7    Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Section, Sections,
8};
9use crate::utils::{bytes_offset_match, u32_from_offset, EntropyCalc};
10use crate::{Ordering, SpecimenFile};
11
12use std::fmt::{Display, Formatter};
13
14use anyhow::{bail, Result};
15use chrono::{DateTime, Utc};
16use flagset::FlagSet;
17use tracing::{debug, instrument};
18use uuid::Uuid;
19
20// Microsoft documentation:
21// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format
22// Wikipedia entry:
23// https://en.wikipedia.org/wiki/Portable_Executable
24
25const EXE_MAGIC: [u8; 2] = [b'M', b'Z'];
26const PE_MAGIC: [u8; 4] = [0x50, 0x45, 0x00, 0x00];
27
28/// EXE subtypes
29///
30/// PE32 files are EXE files, but not all EXE files are PE32.
31#[derive(Clone, Debug, Eq, PartialEq)]
32pub enum SubType {
33    /// MS-DOS, IBM DOS, Free DOS executable
34    DosExe,
35
36    /// .Net executable, PE32
37    DotNet,
38
39    /// Portable Executable, MS Windows
40    PE32,
41
42    /// 64-bit Portable Executable, MS Windows
43    PE32_64,
44
45    /// New Executable, 16-bit, Windows 3.1
46    NE,
47
48    /// Linear Executable, IBM OS/2
49    LE,
50}
51
52impl SubType {
53    /// Static string representation
54    #[must_use]
55    pub fn as_str(&self) -> &'static str {
56        match self {
57            SubType::DosExe => "MZ DOS executable",
58            SubType::DotNet => ".Net executable",
59            SubType::PE32 => "Portable Executable (PE32)",
60            SubType::PE32_64 => "64-bit Portable Executable (PE32+)",
61            SubType::NE => "New Executable",
62            SubType::LE => "Linear Executable",
63        }
64    }
65}
66
67impl Display for SubType {
68    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
69        write!(f, "{}", self.as_str())
70    }
71}
72
73/// PE32 files may be for different subsystems in Windows.
74///
75/// Some notable examples are graphical vs. command line applications, EFI applications, etc.
76#[derive(Clone, Debug, Eq, PartialEq)]
77pub enum SubSystem {
78    /// Unknown subsystem
79    Unknown,
80
81    /// Graphical Windows
82    WindowsGUI,
83
84    /// Command-line Windows
85    WindowsCLI,
86
87    /// Command-line OS/2
88    OS2CLI,
89
90    /// Posix subsystem
91    POSIX,
92
93    /// Native code
94    NativeCode,
95
96    /// Native driver
97    NativeDriver,
98
99    /// Windows CE (embedded)
100    WindowsCE,
101
102    /// EFI (Extensible Firmware Interface) application
103    EFI,
104
105    /// EFI driver with boot services
106    EFIBoot,
107
108    /// EFI ROM image
109    EFIRom,
110
111    /// EFI driver with runtime services
112    EFIRuntimeDriver,
113
114    /// Xbox application
115    Xbox,
116
117    /// Windows Boot application
118    WindowsBoot,
119}
120
121impl SubSystem {
122    /// Static string representation
123    #[must_use]
124    pub fn as_str(&self) -> &'static str {
125        match self {
126            SubSystem::Unknown => "Unknown subsystem",
127            SubSystem::WindowsGUI => "Windows GUI",
128            SubSystem::WindowsCLI => "Windows CLI",
129            SubSystem::OS2CLI => "OS/2 CLI",
130            SubSystem::POSIX => "Posix",
131            SubSystem::NativeCode => "Native Code",
132            SubSystem::NativeDriver => "Native Driver",
133            SubSystem::WindowsCE => "Windows CE",
134            SubSystem::EFI => "EFI",
135            SubSystem::EFIBoot => "EFI boot",
136            SubSystem::EFIRom => "EFI ROM",
137            SubSystem::EFIRuntimeDriver => "EFI driver with runtime",
138            SubSystem::Xbox => "Xbox",
139            SubSystem::WindowsBoot => "Windows Boot",
140        }
141    }
142}
143
144impl Display for SubSystem {
145    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
146        write!(f, "{}", self.as_str())
147    }
148}
149
150impl From<u16> for SubSystem {
151    fn from(value: u16) -> Self {
152        match value {
153            1 => SubSystem::NativeCode,
154            2 => SubSystem::WindowsGUI,
155            3 => SubSystem::WindowsCLI,
156            5 => SubSystem::OS2CLI,
157            7 => SubSystem::POSIX,
158            8 => SubSystem::NativeDriver,
159            9 => SubSystem::WindowsCE,
160            10 => SubSystem::EFI,
161            11 => SubSystem::EFIBoot,
162            12 => SubSystem::EFIRuntimeDriver,
163            13 => SubSystem::EFIRom,
164            14 => SubSystem::Xbox,
165            16 => SubSystem::WindowsBoot,
166            _ => SubSystem::Unknown,
167        }
168    }
169}
170
171/// The struct to partially represent EXE files.
172///
173/// Used on Windows, DOS, React OS, OS/2, and maybe others.
174///
175/// Effort is made to fail gracefully, since malware may not obey all the rules, and some information
176/// is better than none because some part of the data wasn't parsed correctly.
177#[derive(Clone, Debug)]
178pub struct EXE<'a> {
179    /// If the program is 64-bit
180    pub is64bit: bool,
181
182    /// If the binary has extra data after the last section, could be used to hide something
183    pub has_overlay: Option<bool>,
184
185    /// Instruction set architecture for this binary
186    pub arch: Architecture,
187
188    /// EXE sub-type, mostly if it's for DOS, Windows, OS/2
189    pub sub_type: SubType,
190
191    /// Operating System for this binary, likely Windows
192    pub os: OperatingSystem,
193
194    /// COFF (Common Object File Format) header of the program
195    pub coff_header: Option<fields::COFFHeader>,
196
197    /// Optional Header for this program, not optional if for Windows
198    pub optional_header: Option<fields::OptionalHeader>,
199
200    /// Executable subtype: Program or Library?
201    pub exec_type: ExecutableType,
202
203    /// Windows Subsystem used by this program
204    pub subsystem: Option<SubSystem>,
205
206    /// Sections of this binary
207    pub sections: Option<Sections<'a>>,
208
209    /// External libraries used by this application or library
210    pub imports: Option<Imports>,
211
212    /// The array containing the raw bytes used to parse this program
213    pub contents: &'a [u8],
214}
215
216impl<'a> EXE<'a> {
217    /// EXE, MZ, or PE-32 file parsed from a sequence of bytes
218    ///
219    /// # Errors
220    ///
221    /// Returns an error if parsing fails.
222    #[allow(clippy::too_many_lines)]
223    #[instrument(name = "PE32 parser", skip(contents))]
224    pub fn from(contents: &'a [u8]) -> Result<Self> {
225        if !((contents[0] == EXE_MAGIC[0] && contents[1] == EXE_MAGIC[1])
226            || (contents[0] == EXE_MAGIC[1] && contents[1] == EXE_MAGIC[0]))
227        {
228            bail!("Not a MZ, MS-DOS, or PE32 file");
229        }
230
231        if contents.len() <= 0x40 {
232            bail!("Not enough bytes in PE32 file");
233        }
234
235        let pe_magic_offset =
236            u32_from_offset(contents, 0x3C, Ordering::LittleEndian).unwrap_or_default() as usize;
237        let coff_header_offset = pe_magic_offset + 4;
238        if pe_magic_offset > contents.len()
239            || pe_magic_offset + PE_MAGIC.len() > contents.len()
240            || !bytes_offset_match(contents, pe_magic_offset, &PE_MAGIC)
241        {
242            return Ok(Self {
243                is64bit: false,
244                has_overlay: None,
245                arch: Architecture::X86,
246                sub_type: SubType::DosExe,
247                os: OperatingSystem::DOS,
248                subsystem: None,
249                sections: None,
250                coff_header: None,
251                optional_header: None,
252                exec_type: ExecutableType::Program,
253                imports: None,
254                contents,
255            });
256        }
257
258        let mut sections = Sections::default();
259        let coff = fields::COFFHeader::from(&contents[coff_header_offset..])?;
260        let Some(optional_header) =
261            fields::OptionalHeader::from(&contents[coff_header_offset + 20..])
262        else {
263            return Ok(Self {
264                is64bit: false,
265                has_overlay: None,
266                arch: Architecture::X86,
267                sub_type: SubType::DosExe,
268                os: OperatingSystem::DOS,
269                subsystem: None,
270                sections: None,
271                coff_header: None,
272                optional_header: None,
273                exec_type: ExecutableType::Program,
274                imports: None,
275                contents,
276            });
277        };
278
279        let is64bit = optional_header.is_64bit();
280        let sub_type = {
281            let mut sub_type = SubType::PE32;
282            if is64bit {
283                sub_type = SubType::PE32_64;
284            }
285
286            if let Some(data_directories) = optional_header.data_directories() {
287                if data_directories.clr_runtime_header.virtual_address > 0 {
288                    sub_type = SubType::DotNet;
289                }
290            }
291
292            sub_type
293        };
294
295        let exec_type = {
296            if coff
297                .characteristics
298                .contains(fields::CoffCharacteristics::FileDLL)
299            {
300                ExecutableType::Library
301            } else if coff
302                .characteristics
303                .contains(fields::CoffCharacteristics::ExecutableImage)
304            {
305                ExecutableType::Program
306            } else {
307                ExecutableType::Unknown(0)
308            }
309        };
310
311        let mut subsystem = None;
312        let mut has_overlay = false;
313        if coff
314            .characteristics
315            .contains(fields::CoffCharacteristics::ExecutableImage)
316        {
317            let mut offset =
318                u32_from_offset(contents, 0x3C, Ordering::LittleEndian).unwrap_or_default() + 24;
319            if let Some(subsys) = optional_header.subsystem() {
320                subsystem = Some(subsys.into());
321            }
322
323            if is64bit {
324                offset += 240;
325            } else {
326                offset += 224;
327            }
328            let mut sections_offset = 0;
329            for sec_num in 0..coff.num_sections {
330                if (offset + 40) as usize > contents.len() {
331                    break;
332                }
333                let section_name =
334                    String::from_utf8(contents[offset as usize..(offset + 8) as usize].to_vec())
335                        .unwrap_or_default();
336                let section_virtual_size =
337                    u32_from_offset(contents, (offset + 8) as usize, Ordering::LittleEndian);
338                let section_virtual_address =
339                    u32_from_offset(contents, (offset + 12) as usize, Ordering::LittleEndian);
340                let section_size =
341                    u32_from_offset(contents, (offset + 16) as usize, Ordering::LittleEndian)
342                        .unwrap_or_default() as usize;
343                let section_offset =
344                    u32_from_offset(contents, (offset + 20) as usize, Ordering::LittleEndian)
345                        .unwrap_or_default() as usize;
346                let section_characteristics = FlagSet::<fields::SectionFlags>::new(
347                    u32_from_offset(contents, (offset + 36) as usize, Ordering::LittleEndian)
348                        .unwrap_or_default(),
349                )
350                .unwrap_or_default();
351                let section_executable = section_characteristics.contains(
352                    fields::SectionFlags::Executable | fields::SectionFlags::ContainsCode,
353                );
354
355                if section_offset + section_size < contents.len() {
356                    sections.push(Section {
357                        name: section_name,
358                        is_executable: section_executable,
359                        size: section_size,
360                        offset: section_offset,
361                        virtual_address: section_virtual_address.unwrap_or_default(),
362                        virtual_size: section_virtual_size.unwrap_or_default(),
363                        entropy: contents[section_offset..(section_offset + section_size)]
364                            .to_vec()
365                            .entropy(),
366                        data: Some(&contents[section_offset..(section_offset + section_size)]),
367                    });
368                } else {
369                    debug!("PE32: Skipping section {sec_num} '{section_name}': offset {section_offset} + size {section_size} >= file length {}", contents.len());
370                }
371
372                if section_size + section_offset > sections_offset {
373                    sections_offset = section_size + section_offset;
374                }
375                offset += 40; // Section entries are 40 bytes
376            }
377            has_overlay = sections_offset > 0
378                && sections_offset < contents.len()
379                && contents.len() - sections_offset > 0;
380        }
381
382        let mut imports = None;
383        if let Some(data_directories) = optional_header.data_directories() {
384            let import_section = sections
385                .iter()
386                .find(|x| x.virtual_address == data_directories.import_table.virtual_address);
387
388            if let Some(import_section) = import_section {
389                if import_section.size > 8 && import_section.offset > 0 {
390                    if let Some(num_imports) = match optional_header {
391                        fields::OptionalHeader::OptionalHeader32(h32) => h32.num_rva_and_sizes,
392                        fields::OptionalHeader::OptionalHeader64(h64) => h64.num_rva_and_sizes,
393                    } {
394                        imports =
395                            Some(data_directories.imports(num_imports as usize, import_section));
396                    }
397                }
398            }
399        }
400
401        Ok(Self {
402            is64bit,
403            has_overlay: Some(has_overlay),
404            arch: coff.architecture(),
405            sub_type,
406            os: OperatingSystem::Windows,
407            subsystem,
408            sections: Some(sections),
409            exec_type,
410            coff_header: Some(*coff),
411            optional_header: Some(optional_header),
412            imports,
413            contents,
414        })
415    }
416}
417
418impl ExecutableFile for EXE<'_> {
419    fn architecture(&self) -> Option<Architecture> {
420        Some(self.arch)
421    }
422
423    fn pointer_size(&self) -> usize {
424        match self.sub_type {
425            SubType::PE32_64 => 64,
426            SubType::LE | SubType::NE => 16,
427            _ => 32,
428        }
429    }
430
431    fn operating_system(&self) -> OperatingSystem {
432        self.os
433    }
434
435    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
436        self.coff_header.map(|c| c.compiled_date())
437    }
438
439    #[allow(clippy::cast_possible_truncation)]
440    fn num_sections(&self) -> u32 {
441        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
442    }
443
444    fn sections(&self) -> Option<&Sections<'_>> {
445        self.sections.as_ref()
446    }
447
448    fn import_hash(&self) -> Option<Uuid> {
449        self.imports.as_ref().map(Imports::hash)
450    }
451
452    fn fuzzy_imports(&self) -> Option<String> {
453        self.imports.as_ref().map(Imports::fuzzy_hash)
454    }
455}
456
457impl SpecimenFile for EXE<'_> {
458    const MAGIC: &'static [&'static [u8]] = &[b"MZ", b"ZM"];
459
460    fn type_name(&self) -> &'static str {
461        self.sub_type.as_str()
462    }
463}
464
465impl Display for EXE<'_> {
466    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
467        writeln!(f, "EXE file:")?;
468        writeln!(f, "\tSubtype: {}", self.sub_type)?;
469        writeln!(f, "\tExecutable type: {}", self.exec_type)?;
470        writeln!(f, "\tOS: {}", self.os)?;
471        writeln!(f, "\tArch: {}", self.arch)?;
472        if let Some(coff) = self.coff_header {
473            writeln!(
474                f,
475                "\tCompiled: {:?}",
476                coff.compiled_date().format("%Y-%m-%d %H:%M:%S").to_string()
477            )?;
478        }
479        if let Some(subsys) = &self.subsystem {
480            writeln!(f, "\tSubsystem: {subsys}")?;
481        }
482        if let Some(sections) = &self.sections {
483            writeln!(f, "\t{} sections:", sections.len())?;
484            for section in sections {
485                writeln!(f, "\t\t{section}")?;
486            }
487        }
488        if let Some(imports) = &self.imports {
489            if imports.imports.len() == imports.expected_imports as usize {
490                writeln!(f, "\t{} imports:", imports.imports.len())?;
491            } else {
492                writeln!(
493                    f,
494                    "\t{} out of {} imports:",
495                    imports.imports.len(),
496                    imports.expected_imports
497                )?;
498            }
499            for import in &imports.imports {
500                writeln!(f, "\t\t{import}")?;
501            }
502            writeln!(f, "\t\tImport hash: {}", hex::encode(imports.hash()))?;
503            writeln!(f, "\t\tFuzzy Import hash: {}", imports.fuzzy_hash())?;
504        }
505        if self.has_overlay == Some(true) {
506            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
507        }
508        writeln!(f, "\tSize: {}", self.contents.len())?;
509        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
510    }
511}
512
513#[cfg(test)]
514mod tests {
515    use super::*;
516
517    use rstest::rstest;
518
519    // aclock binaries are from https://github.com/tenox7/aclock, and used as a stand-in for weird
520    // files instead of storing actual malware in the repository
521    #[rstest]
522    #[case::alpha(include_bytes!("../../../testdata/exe/pe32_aclock_axp.exe"), false, Architecture::Alpha, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
523    #[case::alpha64(include_bytes!("../../../testdata/exe/pe64_aclock_axp64.exe"), true, Architecture::Alpha64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
524    #[case::arm64(include_bytes!("../../../testdata/exe/pe64_win32_console_arm64_gnu.exe"), true, Architecture::ARM64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
525    #[case::dot_net(include_bytes!("../../../testdata/exe/pe32_dotnet.exe"), false, Architecture::X86, ExecutableType::Program, SubType::DotNet, Some(SubSystem::WindowsCLI))]
526    #[case::efi(include_bytes!("../../../testdata/exe/efi/hello.efi"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::EFI))]
527    #[case::mips(include_bytes!("../../../testdata/exe/pe32_aclock_mips.exe"), false, Architecture::MIPSEL, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
528    #[case::ppc(include_bytes!("../../../testdata/exe/pe32_aclock_ppc_winnt.exe"), false, Architecture::PowerPCLE, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
529    #[case::x86_64_console(include_bytes!("../../../testdata/exe/pe64_win32_console_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
530    #[case::x86_64_gui(include_bytes!("../../../testdata/exe/pe64_win32_gui_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsGUI))]
531    #[case::x86_64_posix(include_bytes!("../../../testdata/exe/pe64_posix_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::POSIX))]
532    #[case::x86_64_xbox(include_bytes!("../../../testdata/exe/pe64_xbox_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::Xbox))]
533    #[case::x86_64_lib_console(include_bytes!("../../../testdata/exe/pe64_lib_console_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Library, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
534    #[case::x86_64_lib_gui(include_bytes!("../../../testdata/exe/pe64_lib_gui_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Library, SubType::PE32_64, Some(SubSystem::WindowsGUI))]
535    #[case::x86_exe(include_bytes!("../../../testdata/exe/aclock-i386-win16.exe"), false, Architecture::X86, ExecutableType::Program, SubType::DosExe, None)]
536    #[case::i8086_exe(include_bytes!("../../../testdata/exe/aclock-i8086-win1x.exe"), false, Architecture::X86, ExecutableType::Program, SubType::DosExe, None)]
537    #[test]
538    fn binaries(
539        #[case] bytes: &[u8],
540        #[case] is_64bit: bool,
541        #[case] arch: Architecture,
542        #[case] exec_type: ExecutableType,
543        #[case] sub_type: SubType,
544        #[case] subsystem: Option<SubSystem>,
545    ) {
546        let exe = EXE::from(bytes).unwrap();
547        eprintln!("{exe}");
548        assert_eq!(exe.is64bit, is_64bit);
549        assert_eq!(exe.arch, arch);
550        assert_eq!(exe.subsystem, subsystem);
551        assert_eq!(exe.exec_type, exec_type);
552        assert_eq!(exe.sub_type, sub_type);
553
554        if arch == Architecture::X86_64 {
555            if subsystem == Some(SubSystem::EFI) {
556                assert!(exe.imports.is_none());
557            } else {
558                let imports = exe.imports.unwrap();
559                if exec_type == ExecutableType::Library {
560                    assert_eq!(
561                        "466e0075eba65b201b4cc8d4d3f85cbb",
562                        hex::encode(imports.hash())
563                    );
564                } else if exec_type == ExecutableType::Program {
565                    assert_eq!(
566                        "610b6b6aa37c8e01c9855a05dcf00565",
567                        hex::encode(imports.hash())
568                    );
569                }
570            }
571        }
572    }
573}