mod fields;
use crate::exec::{
Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Section, Sections,
};
use crate::utils::{bytes_offset_match, u32_from_offset, EntropyCalc};
use crate::{Ordering, SpecimenFile};
use std::fmt::{Display, Formatter};
use anyhow::{bail, Result};
use chrono::{DateTime, Utc};
use flagset::FlagSet;
use tracing::{debug, instrument};
use uuid::Uuid;
const EXE_MAGIC: [u8; 2] = [b'M', b'Z'];
const PE_MAGIC: [u8; 4] = [0x50, 0x45, 0x00, 0x00];
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum SubType {
DosExe,
DotNet,
PE32,
PE32_64,
NE,
LE,
}
impl SubType {
#[must_use]
pub fn as_str(&self) -> &'static str {
match self {
SubType::DosExe => "MZ DOS executable",
SubType::DotNet => ".Net executable",
SubType::PE32 => "Portable Executable (PE32)",
SubType::PE32_64 => "64-bit Portable Executable (PE32+)",
SubType::NE => "New Executable",
SubType::LE => "Linear Executable",
}
}
}
impl Display for SubType {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum SubSystem {
Unknown,
WindowsGUI,
WindowsCLI,
OS2CLI,
POSIX,
NativeCode,
NativeDriver,
WindowsCE,
EFI,
EFIBoot,
EFIRom,
EFIRuntimeDriver,
Xbox,
WindowsBoot,
}
impl SubSystem {
#[must_use]
pub fn as_str(&self) -> &'static str {
match self {
SubSystem::Unknown => "Unknown subsystem",
SubSystem::WindowsGUI => "Windows GUI",
SubSystem::WindowsCLI => "Windows CLI",
SubSystem::OS2CLI => "OS/2 CLI",
SubSystem::POSIX => "Posix",
SubSystem::NativeCode => "Native Code",
SubSystem::NativeDriver => "Native Driver",
SubSystem::WindowsCE => "Windows CE",
SubSystem::EFI => "EFI",
SubSystem::EFIBoot => "EFI boot",
SubSystem::EFIRom => "EFI ROM",
SubSystem::EFIRuntimeDriver => "EFI driver with runtime",
SubSystem::Xbox => "Xbox",
SubSystem::WindowsBoot => "Windows Boot",
}
}
}
impl Display for SubSystem {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl From<u16> for SubSystem {
fn from(value: u16) -> Self {
match value {
1 => SubSystem::NativeCode,
2 => SubSystem::WindowsGUI,
3 => SubSystem::WindowsCLI,
5 => SubSystem::OS2CLI,
7 => SubSystem::POSIX,
8 => SubSystem::NativeDriver,
9 => SubSystem::WindowsCE,
10 => SubSystem::EFI,
11 => SubSystem::EFIBoot,
12 => SubSystem::EFIRuntimeDriver,
13 => SubSystem::EFIRom,
14 => SubSystem::Xbox,
16 => SubSystem::WindowsBoot,
_ => SubSystem::Unknown,
}
}
}
#[derive(Clone, Debug)]
pub struct EXE<'a> {
pub is64bit: bool,
pub has_overlay: Option<bool>,
pub arch: Architecture,
pub sub_type: SubType,
pub os: OperatingSystem,
pub coff_header: Option<fields::COFFHeader>,
pub optional_header: Option<fields::OptionalHeader>,
pub exec_type: ExecutableType,
pub subsystem: Option<SubSystem>,
pub sections: Option<Sections<'a>>,
pub imports: Option<Imports>,
pub contents: &'a [u8],
}
impl<'a> EXE<'a> {
#[allow(clippy::too_many_lines)]
#[instrument(name = "PE32 parser", skip(contents))]
pub fn from(contents: &'a [u8]) -> Result<Self> {
if !((contents[0] == EXE_MAGIC[0] && contents[1] == EXE_MAGIC[1])
|| (contents[0] == EXE_MAGIC[1] && contents[1] == EXE_MAGIC[0]))
{
bail!("Not a MZ, MS-DOS, or PE32 file");
}
if contents.len() <= 0x40 {
bail!("Not enough bytes in PE32 file");
}
let pe_magic_offset =
u32_from_offset(contents, 0x3C, Ordering::LittleEndian).unwrap_or_default() as usize;
let coff_header_offset = pe_magic_offset + 4;
if pe_magic_offset > contents.len()
|| pe_magic_offset + PE_MAGIC.len() > contents.len()
|| !bytes_offset_match(contents, pe_magic_offset, &PE_MAGIC)
{
return Ok(Self {
is64bit: false,
has_overlay: None,
arch: Architecture::X86,
sub_type: SubType::DosExe,
os: OperatingSystem::DOS,
subsystem: None,
sections: None,
coff_header: None,
optional_header: None,
exec_type: ExecutableType::Program,
imports: None,
contents,
});
}
let mut sections = Sections::default();
let coff = fields::COFFHeader::from(&contents[coff_header_offset..])?;
let Some(optional_header) =
fields::OptionalHeader::from(&contents[coff_header_offset + 20..])
else {
return Ok(Self {
is64bit: false,
has_overlay: None,
arch: Architecture::X86,
sub_type: SubType::DosExe,
os: OperatingSystem::DOS,
subsystem: None,
sections: None,
coff_header: None,
optional_header: None,
exec_type: ExecutableType::Program,
imports: None,
contents,
});
};
let is64bit = optional_header.is_64bit();
let sub_type = {
let mut sub_type = SubType::PE32;
if is64bit {
sub_type = SubType::PE32_64;
}
if let Some(data_directories) = optional_header.data_directories() {
if data_directories.clr_runtime_header.virtual_address > 0 {
sub_type = SubType::DotNet;
}
}
sub_type
};
let exec_type = {
if coff
.characteristics
.contains(fields::CoffCharacteristics::FileDLL)
{
ExecutableType::Library
} else if coff
.characteristics
.contains(fields::CoffCharacteristics::ExecutableImage)
{
ExecutableType::Program
} else {
ExecutableType::Unknown(0)
}
};
let mut subsystem = None;
let mut has_overlay = false;
if coff
.characteristics
.contains(fields::CoffCharacteristics::ExecutableImage)
{
let mut offset =
u32_from_offset(contents, 0x3C, Ordering::LittleEndian).unwrap_or_default() + 24;
if let Some(subsys) = optional_header.subsystem() {
subsystem = Some(subsys.into());
}
if is64bit {
offset += 240;
} else {
offset += 224;
}
let mut sections_offset = 0;
for sec_num in 0..coff.num_sections {
if (offset + 40) as usize > contents.len() {
break;
}
let section_name =
String::from_utf8(contents[offset as usize..(offset + 8) as usize].to_vec())
.unwrap_or_default();
let section_virtual_size =
u32_from_offset(contents, (offset + 8) as usize, Ordering::LittleEndian);
let section_virtual_address =
u32_from_offset(contents, (offset + 12) as usize, Ordering::LittleEndian);
let section_size =
u32_from_offset(contents, (offset + 16) as usize, Ordering::LittleEndian)
.unwrap_or_default() as usize;
let section_offset =
u32_from_offset(contents, (offset + 20) as usize, Ordering::LittleEndian)
.unwrap_or_default() as usize;
let section_characteristics = FlagSet::<fields::SectionFlags>::new(
u32_from_offset(contents, (offset + 36) as usize, Ordering::LittleEndian)
.unwrap_or_default(),
)
.unwrap_or_default();
let section_executable = section_characteristics.contains(
fields::SectionFlags::Executable | fields::SectionFlags::ContainsCode,
);
if section_offset + section_size < contents.len() {
sections.push(Section {
name: section_name,
is_executable: section_executable,
size: section_size,
offset: section_offset,
virtual_address: section_virtual_address.unwrap_or_default(),
virtual_size: section_virtual_size.unwrap_or_default(),
entropy: contents[section_offset..(section_offset + section_size)]
.to_vec()
.entropy(),
data: Some(&contents[section_offset..(section_offset + section_size)]),
});
} else {
debug!("PE32: Skipping section {sec_num} '{section_name}': offset {section_offset} + size {section_size} >= file length {}", contents.len());
}
if section_size + section_offset > sections_offset {
sections_offset = section_size + section_offset;
}
offset += 40; }
has_overlay = sections_offset > 0
&& sections_offset < contents.len()
&& contents.len() - sections_offset > 0;
}
let mut imports = None;
if let Some(data_directories) = optional_header.data_directories() {
let import_section = sections
.iter()
.find(|x| x.virtual_address == data_directories.import_table.virtual_address);
if let Some(import_section) = import_section {
if import_section.size > 8 && import_section.offset > 0 {
if let Some(num_imports) = match optional_header {
fields::OptionalHeader::OptionalHeader32(h32) => h32.num_rva_and_sizes,
fields::OptionalHeader::OptionalHeader64(h64) => h64.num_rva_and_sizes,
} {
imports =
Some(data_directories.imports(num_imports as usize, import_section));
}
}
}
}
Ok(Self {
is64bit,
has_overlay: Some(has_overlay),
arch: coff.architecture(),
sub_type,
os: OperatingSystem::Windows,
subsystem,
sections: Some(sections),
exec_type,
coff_header: Some(*coff),
optional_header: Some(optional_header),
imports,
contents,
})
}
}
impl ExecutableFile for EXE<'_> {
fn architecture(&self) -> Option<Architecture> {
Some(self.arch)
}
fn pointer_size(&self) -> usize {
match self.sub_type {
SubType::PE32_64 => 64,
SubType::LE | SubType::NE => 16,
_ => 32,
}
}
fn operating_system(&self) -> OperatingSystem {
self.os
}
fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
self.coff_header.map(|c| c.compiled_date())
}
#[allow(clippy::cast_possible_truncation)]
fn num_sections(&self) -> u32 {
self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
}
fn sections(&self) -> Option<&Sections<'_>> {
self.sections.as_ref()
}
fn import_hash(&self) -> Option<Uuid> {
self.imports.as_ref().map(Imports::hash)
}
fn fuzzy_imports(&self) -> Option<String> {
self.imports.as_ref().map(Imports::fuzzy_hash)
}
}
impl SpecimenFile for EXE<'_> {
const MAGIC: &'static [&'static [u8]] = &[b"MZ", b"ZM"];
fn type_name(&self) -> &'static str {
self.sub_type.as_str()
}
}
impl Display for EXE<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "EXE file:")?;
writeln!(f, "\tSubtype: {}", self.sub_type)?;
writeln!(f, "\tExecutable type: {}", self.exec_type)?;
writeln!(f, "\tOS: {}", self.os)?;
writeln!(f, "\tArch: {}", self.arch)?;
if let Some(coff) = self.coff_header {
writeln!(
f,
"\tCompiled: {:?}",
coff.compiled_date().format("%Y-%m-%d %H:%M:%S").to_string()
)?;
}
if let Some(subsys) = &self.subsystem {
writeln!(f, "\tSubsystem: {subsys}")?;
}
if let Some(sections) = &self.sections {
writeln!(f, "\t{} sections:", sections.len())?;
for section in sections {
writeln!(f, "\t\t{section}")?;
}
}
if let Some(imports) = &self.imports {
if imports.imports.len() == imports.expected_imports as usize {
writeln!(f, "\t{} imports:", imports.imports.len())?;
} else {
writeln!(
f,
"\t{} out of {} imports:",
imports.imports.len(),
imports.expected_imports
)?;
}
for import in &imports.imports {
writeln!(f, "\t\t{import}")?;
}
writeln!(f, "\t\tImport hash: {}", hex::encode(imports.hash()))?;
writeln!(f, "\t\tFuzzy Import hash: {}", imports.fuzzy_hash())?;
}
if self.has_overlay == Some(true) {
writeln!(f, "\tHas extra bytes at the end (overlay).")?;
}
writeln!(f, "\tSize: {}", self.contents.len())?;
writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
}
}
#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;
#[rstest]
#[case::alpha(include_bytes!("../../../testdata/exe/pe32_aclock_axp.exe"), false, Architecture::Alpha, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
#[case::alpha64(include_bytes!("../../../testdata/exe/pe64_aclock_axp64.exe"), true, Architecture::Alpha64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
#[case::arm64(include_bytes!("../../../testdata/exe/pe64_win32_console_arm64_gnu.exe"), true, Architecture::ARM64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
#[case::dot_net(include_bytes!("../../../testdata/exe/pe32_dotnet.exe"), false, Architecture::X86, ExecutableType::Program, SubType::DotNet, Some(SubSystem::WindowsCLI))]
#[case::efi(include_bytes!("../../../testdata/exe/efi/hello.efi"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::EFI))]
#[case::mips(include_bytes!("../../../testdata/exe/pe32_aclock_mips.exe"), false, Architecture::MIPSEL, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
#[case::ppc(include_bytes!("../../../testdata/exe/pe32_aclock_ppc_winnt.exe"), false, Architecture::PowerPCLE, ExecutableType::Program, SubType::PE32, Some(SubSystem::WindowsCLI))]
#[case::x86_64_console(include_bytes!("../../../testdata/exe/pe64_win32_console_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
#[case::x86_64_gui(include_bytes!("../../../testdata/exe/pe64_win32_gui_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::WindowsGUI))]
#[case::x86_64_posix(include_bytes!("../../../testdata/exe/pe64_posix_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::POSIX))]
#[case::x86_64_xbox(include_bytes!("../../../testdata/exe/pe64_xbox_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Program, SubType::PE32_64, Some(SubSystem::Xbox))]
#[case::x86_64_lib_console(include_bytes!("../../../testdata/exe/pe64_lib_console_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Library, SubType::PE32_64, Some(SubSystem::WindowsCLI))]
#[case::x86_64_lib_gui(include_bytes!("../../../testdata/exe/pe64_lib_gui_x86_64_gnu.exe"), true, Architecture::X86_64, ExecutableType::Library, SubType::PE32_64, Some(SubSystem::WindowsGUI))]
#[case::x86_exe(include_bytes!("../../../testdata/exe/aclock-i386-win16.exe"), false, Architecture::X86, ExecutableType::Program, SubType::DosExe, None)]
#[case::i8086_exe(include_bytes!("../../../testdata/exe/aclock-i8086-win1x.exe"), false, Architecture::X86, ExecutableType::Program, SubType::DosExe, None)]
#[test]
fn binaries(
#[case] bytes: &[u8],
#[case] is_64bit: bool,
#[case] arch: Architecture,
#[case] exec_type: ExecutableType,
#[case] sub_type: SubType,
#[case] subsystem: Option<SubSystem>,
) {
let exe = EXE::from(bytes).unwrap();
eprintln!("{exe}");
assert_eq!(exe.is64bit, is_64bit);
assert_eq!(exe.arch, arch);
assert_eq!(exe.subsystem, subsystem);
assert_eq!(exe.exec_type, exec_type);
assert_eq!(exe.sub_type, sub_type);
if arch == Architecture::X86_64 {
if subsystem == Some(SubSystem::EFI) {
assert!(exe.imports.is_none());
} else {
let imports = exe.imports.unwrap();
if exec_type == ExecutableType::Library {
assert_eq!(
"466e0075eba65b201b4cc8d4d3f85cbb",
hex::encode(imports.hash())
);
} else if exec_type == ExecutableType::Program {
assert_eq!(
"610b6b6aa37c8e01c9855a05dcf00565",
hex::encode(imports.hash())
);
}
}
}
}
}