use std::collections::BTreeSet;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PeMetadata {
pub is_dll: bool,
pub is_64bit: bool,
pub imphash: String,
pub num_sections: u16,
pub num_imports: u32,
pub entry_point_rva: u32,
pub has_signature: bool,
pub section_names: Vec<String>,
pub import_dlls: Vec<String>,
}
#[must_use]
pub fn parse_pe(bytes: &[u8]) -> Option<PeMetadata> {
if bytes.len() < 64 || &bytes[..2] != b"MZ" {
return None;
}
let e_lfanew =
u32::from_le_bytes([bytes[0x3C], bytes[0x3D], bytes[0x3E], bytes[0x3F]]) as usize;
if bytes.len() < e_lfanew + 24 || &bytes[e_lfanew..e_lfanew + 4] != b"PE\0\0" {
return None;
}
let coff_offset = e_lfanew + 4;
let num_sections = u16::from_le_bytes([bytes[coff_offset + 2], bytes[coff_offset + 3]]);
let size_optional_header =
u16::from_le_bytes([bytes[coff_offset + 16], bytes[coff_offset + 17]]);
let characteristics = u16::from_le_bytes([bytes[coff_offset + 18], bytes[coff_offset + 19]]);
let optional_header_offset = coff_offset + 20;
if bytes.len() < optional_header_offset + size_optional_header as usize {
return None;
}
let is_dll = (characteristics & 0x2000) != 0;
let mut is_64bit = false;
let mut entry_point_rva = 0u32;
let mut data_dir_import_rva = 0u32;
let mut data_dir_import_size = 0u32;
let mut data_dir_cert_rva = 0u32;
let mut data_dir_cert_size = 0u32;
if size_optional_header > 0 {
let magic = u16::from_le_bytes([
bytes[optional_header_offset],
bytes[optional_header_offset + 1],
]);
is_64bit = magic == 0x20b;
let pe32 = magic == 0x10b;
if pe32 || is_64bit {
entry_point_rva = u32::from_le_bytes([
bytes[optional_header_offset + 16],
bytes[optional_header_offset + 17],
bytes[optional_header_offset + 18],
bytes[optional_header_offset + 19],
]);
let data_dir_offset = if is_64bit {
optional_header_offset + 112
} else {
optional_header_offset + 96
};
if size_optional_header as usize >= data_dir_offset + 16 - optional_header_offset {
data_dir_import_rva = read_u32(bytes, data_dir_offset + 8)?;
data_dir_import_size = read_u32(bytes, data_dir_offset + 12)?;
data_dir_cert_rva = read_u32(bytes, data_dir_offset + 32)?;
data_dir_cert_size = read_u32(bytes, data_dir_offset + 36)?;
}
}
}
let section_table_offset = optional_header_offset + size_optional_header as usize;
let section_table_size = num_sections as usize * 40;
if bytes.len() < section_table_offset + section_table_size {
return None;
}
let mut section_names = Vec::with_capacity(num_sections as usize);
let mut sections = Vec::with_capacity(num_sections as usize);
for index in 0..num_sections as usize {
let section_offset = section_table_offset + index * 40;
let name_bytes = &bytes[section_offset..section_offset + 8];
let name_len = name_bytes.iter().position(|&byte| byte == 0).unwrap_or(8);
let name = String::from_utf8_lossy(&name_bytes[..name_len]).to_string();
section_names.push(name);
sections.push(Section {
virtual_size: read_u32(bytes, section_offset + 8)?,
virtual_address: read_u32(bytes, section_offset + 12)?,
size_of_raw_data: read_u32(bytes, section_offset + 16)?,
pointer_to_raw_data: read_u32(bytes, section_offset + 20)?,
});
}
let has_signature = data_dir_cert_rva != 0 && data_dir_cert_size != 0;
let (imphash, num_imports, import_dlls) = compute_imphash(
bytes,
§ions,
data_dir_import_rva,
data_dir_import_size,
is_64bit,
);
Some(PeMetadata {
is_dll,
is_64bit,
imphash,
num_sections,
num_imports,
entry_point_rva,
has_signature,
section_names,
import_dlls,
})
}
struct Section {
virtual_address: u32,
virtual_size: u32,
pointer_to_raw_data: u32,
size_of_raw_data: u32,
}
fn read_u32(bytes: &[u8], offset: usize) -> Option<u32> {
if bytes.len() < offset + 4 {
return None;
}
Some(u32::from_le_bytes([
bytes[offset],
bytes[offset + 1],
bytes[offset + 2],
bytes[offset + 3],
]))
}
fn read_u64(bytes: &[u8], offset: usize) -> Option<u64> {
if bytes.len() < offset + 8 {
return None;
}
Some(u64::from_le_bytes([
bytes[offset],
bytes[offset + 1],
bytes[offset + 2],
bytes[offset + 3],
bytes[offset + 4],
bytes[offset + 5],
bytes[offset + 6],
bytes[offset + 7],
]))
}
fn rva_to_file_offset(rva: u32, sections: &[Section]) -> Option<usize> {
for section in sections {
let limit = section.virtual_size.max(section.size_of_raw_data);
if rva >= section.virtual_address
&& rva < section.virtual_address + limit
&& rva - section.virtual_address < section.size_of_raw_data
{
return Some((section.pointer_to_raw_data + (rva - section.virtual_address)) as usize);
}
}
None
}
fn read_null_terminated_string(bytes: &[u8], offset: usize) -> Option<String> {
if offset >= bytes.len() {
return None;
}
let end = bytes[offset..].iter().position(|&byte| byte == 0)?;
Some(String::from_utf8_lossy(&bytes[offset..offset + end]).to_string())
}
fn compute_imphash(
bytes: &[u8],
sections: &[Section],
import_rva: u32,
import_size: u32,
is_64bit: bool,
) -> (String, u32, Vec<String>) {
let empty_hash = "d41d8cd98f00b204e9800998ecf8427e".to_string();
if import_rva == 0 || import_size == 0 || import_size < 20 {
return (empty_hash, 0, Vec::new());
}
let Some(import_dir_offset) = rva_to_file_offset(import_rva, sections) else {
return (empty_hash, 0, Vec::new());
};
let mut import_entries = BTreeSet::new();
let mut import_dlls = Vec::new();
let mut num_imports = 0u32;
let max_descriptors = (import_size / 20).min(1024) as usize;
for index in 0..max_descriptors {
let descriptor_offset = import_dir_offset + index * 20;
if bytes.len() < descriptor_offset + 20 {
break;
}
let original_first_thunk = read_u32(bytes, descriptor_offset).unwrap_or(0);
let name_rva = read_u32(bytes, descriptor_offset + 12).unwrap_or(0);
let first_thunk = read_u32(bytes, descriptor_offset + 16).unwrap_or(0);
if original_first_thunk == 0 && name_rva == 0 && first_thunk == 0 {
break;
}
let Some(name_offset) = rva_to_file_offset(name_rva, sections) else {
continue;
};
let dll_name = read_null_terminated_string(bytes, name_offset).unwrap_or_default();
if dll_name.is_empty() {
continue;
}
import_dlls.push(dll_name.clone());
let dll_lower = dll_name.to_lowercase();
let thunk_rva = if original_first_thunk != 0 {
original_first_thunk
} else {
first_thunk
};
let Some(thunk_offset) = rva_to_file_offset(thunk_rva, sections) else {
continue;
};
let mut thunk_index = 0usize;
loop {
let entry_size = if is_64bit { 8 } else { 4 };
let entry_offset = thunk_offset + thunk_index * entry_size;
if bytes.len() < entry_offset + entry_size {
break;
}
let entry = if is_64bit {
read_u64(bytes, entry_offset).unwrap_or(0)
} else {
read_u32(bytes, entry_offset).unwrap_or(0) as u64
};
if entry == 0 {
break;
}
let ordinal_mask = if is_64bit { 1u64 << 63 } else { 1u64 << 31 };
if (entry & ordinal_mask) != 0 {
import_entries.insert(format!("{}.ord{}", dll_lower, entry & !ordinal_mask));
num_imports += 1;
} else {
let hint_name_rva = (entry & 0xFFFF_FFFF) as u32;
if let Some(hint_offset) = rva_to_file_offset(hint_name_rva, sections)
&& let Some(function_name) = read_null_terminated_string(bytes, hint_offset + 2)
{
import_entries.insert(format!(
"{}.{}",
dll_lower,
function_name.to_lowercase()
));
num_imports += 1;
}
}
thunk_index += 1;
if thunk_index > 8192 {
break;
}
}
}
let imphash_input = import_entries.into_iter().collect::<String>();
let imphash = if imphash_input.is_empty() {
empty_hash
} else {
format!("{:x}", md5::compute(imphash_input.as_bytes()))
};
(imphash, num_imports, import_dlls)
}
#[cfg(test)]
mod tests {
use super::parse_pe;
fn build_minimal_pe() -> Vec<u8> {
let mut pe = vec![0; 64];
pe[0..2].copy_from_slice(b"MZ");
pe[0x3C..0x40].copy_from_slice(&(0x80u32).to_le_bytes());
pe.resize(0x80 + 4 + 20 + 0xE0 + 40, 0);
pe[0x80..0x84].copy_from_slice(b"PE\0\0");
let coff = 0x84;
pe[coff + 2..coff + 4].copy_from_slice(&(1u16).to_le_bytes());
pe[coff + 16..coff + 18].copy_from_slice(&(0xE0u16).to_le_bytes());
let optional = coff + 20;
pe[optional..optional + 2].copy_from_slice(&(0x10Bu16).to_le_bytes());
pe[optional + 16..optional + 20].copy_from_slice(&(0x1000u32).to_le_bytes());
let section = optional + 0xE0;
pe[section..section + 5].copy_from_slice(b".text");
pe[section + 8..section + 12].copy_from_slice(&(0x1000u32).to_le_bytes());
pe[section + 12..section + 16].copy_from_slice(&(0x1000u32).to_le_bytes());
pe[section + 16..section + 20].copy_from_slice(&(0x200u32).to_le_bytes());
pe[section + 20..section + 24].copy_from_slice(&(0x200u32).to_le_bytes());
pe
}
#[test]
fn rejects_invalid_pe() {
assert!(parse_pe(&[]).is_none());
assert!(parse_pe(b"not a pe").is_none());
}
#[test]
fn parses_minimal_pe_headers() {
let pe = build_minimal_pe();
let metadata = parse_pe(&pe).expect("minimal PE should parse");
assert_eq!(metadata.num_sections, 1);
assert_eq!(metadata.entry_point_rva, 0x1000);
assert!(!metadata.is_64bit);
assert!(!metadata.is_dll);
}
}