pub mod parser;
pub mod pe;
pub mod repair;
use std::path::Path;
use cowfile::CowFile;
use crate::{
file::{
pe::constants::COR20_HEADER_SIZE,
repair::{repair_pe_cow, RepairAction},
},
utils::align_to,
Error::{self, Goblin, LayoutFailed, Other},
ParseFailure, ParseStage, Result,
};
use goblin::pe::PE;
use pe::{DataDirectory, DataDirectoryType, Pe};
pub struct File {
data: CowFile,
pe: Pe,
repairs: Vec<RepairAction>,
}
impl File {
pub fn from_path(path: impl AsRef<Path>) -> Result<File> {
let cowfile = CowFile::open(path)?;
Self::load(cowfile)
}
pub fn from_mem(data: Vec<u8>) -> Result<File> {
let cowfile = CowFile::from_vec(data);
Self::load(cowfile)
}
pub fn from_std_file(file: std::fs::File) -> Result<File> {
let cowfile = CowFile::from_file(file)?;
Self::load(cowfile)
}
pub fn from_reader<R: std::io::Read>(mut reader: R) -> Result<File> {
let mut data = Vec::new();
reader
.read_to_end(&mut data)
.map_err(|e| Other(format!("Failed to read from reader: {e}")))?;
Self::from_mem(data)
}
fn load(mut cowfile: CowFile) -> Result<File> {
if cowfile.is_empty() {
return Err(Error::NotSupported);
}
let repair_result = repair_pe_cow(&cowfile);
if !repair_result.repairs.is_empty() {
cowfile.commit()?;
}
let goblin_pe = PE::parse(cowfile.data()).map_err(Goblin)?;
let pe = Pe::from_goblin_pe(&goblin_pe)?;
Ok(File {
data: cowfile,
pe,
repairs: repair_result.repairs,
})
}
#[must_use]
pub fn len(&self) -> usize {
self.data.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[must_use]
pub fn source_path(&self) -> Option<&Path> {
self.data.source_path()
}
pub fn fork_cowfile(&self) -> Result<CowFile> {
self.data.fork().map_err(|e| Error::Other(e.to_string()))
}
#[must_use]
pub fn imagebase(&self) -> u64 {
self.pe.image_base
}
#[must_use]
pub fn header(&self) -> &pe::CoffHeader {
&self.pe.coff_header
}
#[must_use]
pub fn header_dos(&self) -> &pe::DosHeader {
&self.pe.dos_header
}
#[must_use]
pub fn header_optional(&self) -> &Option<pe::OptionalHeader> {
&self.pe.optional_header
}
#[must_use]
pub fn clr(&self) -> Option<(usize, usize)> {
self.pe
.get_clr_runtime_header()
.map(|clr_dir| (clr_dir.virtual_address as usize, clr_dir.size as usize))
}
#[must_use]
pub fn is_clr(&self) -> bool {
self.clr().is_some()
}
#[must_use]
pub fn sections(&self) -> &[pe::SectionTable] {
&self.pe.sections
}
#[must_use]
pub fn directories(&self) -> Vec<(DataDirectoryType, DataDirectory)> {
self.pe
.data_directories
.iter()
.map(|(&dir_type, &dir)| (dir_type, dir))
.collect()
}
#[must_use]
pub fn get_data_directory(&self, dir_type: DataDirectoryType) -> Option<(u32, u32)> {
self.pe
.get_data_directory(dir_type)
.filter(|directory| directory.virtual_address != 0 && directory.size != 0)
.map(|directory| (directory.virtual_address, directory.size))
}
#[must_use]
pub fn imports(&self) -> Option<&Vec<pe::Import>> {
if self.pe.imports.is_empty() {
None
} else {
Some(&self.pe.imports)
}
}
#[must_use]
pub fn exports(&self) -> Option<&Vec<pe::Export>> {
if self.pe.exports.is_empty() {
None
} else {
Some(&self.pe.exports)
}
}
#[must_use]
pub fn data(&self) -> &[u8] {
self.data.data()
}
#[must_use]
pub fn into_cowfile(self) -> CowFile {
self.data
}
pub fn write(&self, offset: usize, data: &[u8]) -> Result<()> {
self.data.write(offset, data).map_err(Into::into)
}
pub fn write_le<T: cowfile::Primitive>(&self, offset: usize, value: T) -> Result<()> {
self.data.write_le(offset, value).map_err(Into::into)
}
pub fn read_le<T: cowfile::Primitive>(&self, offset: usize) -> Result<T> {
self.data.read_le(offset).map_err(Into::into)
}
#[must_use]
pub fn has_pending(&self) -> bool {
self.data.has_pending()
}
pub fn commit_pending(&mut self) -> Result<()> {
self.data.commit().map_err(Into::into)
}
pub fn data_slice(&self, offset: usize, len: usize) -> Result<&[u8]> {
let base = self.data.data();
let oob = || ParseFailure::OutOfBounds {
stage: ParseStage::DataDirectory,
};
let end = offset.checked_add(len).ok_or_else(|| Error::from(oob()))?;
base.get(offset..end).ok_or_else(|| Error::from(oob()))
}
pub fn va_to_offset(&self, va: usize) -> Result<usize> {
let ib = self.imagebase();
let rva_u64 = (va as u64).checked_sub(ib).ok_or_else(|| {
Error::from(ParseFailure::OutOfBounds {
stage: ParseStage::DataDirectory,
})
})?;
let rva = usize::try_from(rva_u64).map_err(|_| ParseFailure::InvalidField {
stage: ParseStage::DataDirectory,
field: "rva",
reason: format!("RVA too large to fit in usize: {rva_u64}"),
})?;
self.rva_to_offset(rva)
}
pub fn rva_to_offset(&self, rva: usize) -> Result<usize> {
let invalid = |field: &'static str, reason: String| ParseFailure::InvalidField {
stage: ParseStage::SectionTable,
field,
reason,
};
for section in &self.pe.sections {
let Some(section_max) = section.virtual_address.checked_add(section.virtual_size)
else {
return Err(invalid(
"section_extent",
format!(
"section malformed, causing integer overflow - {} + {}",
section.virtual_address, section.virtual_size
),
)
.into());
};
let rva_u32 = u32::try_from(rva)
.map_err(|_| invalid("rva", format!("RVA too large to fit in u32: {rva}")))?;
if section.virtual_address <= rva_u32 && section_max > rva_u32 {
let delta = rva
.checked_sub(section.virtual_address as usize)
.ok_or_else(|| invalid("rva", "RVA underflow vs section base".into()))?;
return delta
.checked_add(section.pointer_to_raw_data as usize)
.ok_or_else(|| Error::from(invalid("rva", "RVA-to-offset overflow".into())));
}
}
Err(invalid(
"rva",
format!("RVA could not be converted to offset - {rva}"),
)
.into())
}
pub fn offset_to_rva(&self, offset: usize) -> Result<usize> {
let invalid = |field: &'static str, reason: String| ParseFailure::InvalidField {
stage: ParseStage::SectionTable,
field,
reason,
};
for section in &self.pe.sections {
let Some(section_max) = section
.pointer_to_raw_data
.checked_add(section.size_of_raw_data)
else {
return Err(invalid(
"section_extent",
format!(
"section malformed, causing integer overflow - {} + {}",
section.pointer_to_raw_data, section.size_of_raw_data
),
)
.into());
};
let offset_u32 = u32::try_from(offset).map_err(|_| {
invalid(
"offset",
format!("offset too large to fit in u32: {offset}"),
)
})?;
if section.pointer_to_raw_data <= offset_u32 && section_max > offset_u32 {
let delta = offset
.checked_sub(section.pointer_to_raw_data as usize)
.ok_or_else(|| invalid("offset", "offset underflow vs section base".into()))?;
return delta
.checked_add(section.virtual_address as usize)
.ok_or_else(|| {
Error::from(invalid("offset", "offset-to-RVA overflow".into()))
});
}
}
Err(invalid(
"offset",
format!("offset could not be converted to RVA - {offset}"),
)
.into())
}
#[must_use]
pub fn section_contains_metadata(&self, section_name: &str) -> bool {
let clr_rva = match self.clr() {
Some((rva, size)) if rva > 0 && size >= COR20_HEADER_SIZE as usize => {
let Ok(rva_u32) = u32::try_from(rva) else {
return false; };
rva_u32
}
_ => return false, };
let Ok(clr_offset) = self.rva_to_offset(clr_rva as usize) else {
return false;
};
let Ok(clr_data) = self.data_slice(clr_offset, COR20_HEADER_SIZE as usize) else {
return false;
};
let Some(rva_bytes) = clr_data
.get(8..12)
.and_then(|s| <[u8; 4]>::try_from(s).ok())
else {
return false;
};
let meta_data_rva = u32::from_le_bytes(rva_bytes);
if meta_data_rva == 0 {
return false; }
for section in self.sections() {
let current_section_name = section.name.as_str();
if current_section_name == section_name {
let section_start = section.virtual_address;
let Some(section_end) = section.virtual_address.checked_add(section.virtual_size)
else {
return false;
};
return meta_data_rva >= section_start && meta_data_rva < section_end;
}
}
false }
pub fn file_alignment(&self) -> Result<u32> {
let optional_header = self.header_optional().as_ref().ok_or_else(|| {
LayoutFailed("Missing optional header for file alignment".to_string())
})?;
Ok(optional_header.windows_fields.file_alignment)
}
pub fn section_alignment(&self) -> Result<u32> {
let optional_header = self.header_optional().as_ref().ok_or_else(|| {
LayoutFailed("Missing optional header for section alignment".to_string())
})?;
Ok(optional_header.windows_fields.section_alignment)
}
pub fn is_pe32_plus_format(&self) -> Result<bool> {
let optional_header = self.header_optional().as_ref().ok_or_else(|| {
LayoutFailed("Missing optional header for PE format detection".to_string())
})?;
Ok(optional_header.standard_fields.magic != 0x10b)
}
fn find_text_section(&self) -> Result<&pe::SectionTable> {
self.sections()
.iter()
.find(|s| s.name.as_str() == ".text" || s.name.starts_with(".text"))
.ok_or_else(|| LayoutFailed("Could not find .text section".to_string()))
}
pub fn text_section_rva(&self) -> Result<u32> {
Ok(self.find_text_section()?.virtual_address)
}
pub fn text_section_file_offset(&self) -> Result<u64> {
Ok(u64::from(self.find_text_section()?.pointer_to_raw_data))
}
pub fn text_section_raw_size(&self) -> Result<u32> {
Ok(self.find_text_section()?.size_of_raw_data)
}
#[must_use]
pub fn file_size(&self) -> u64 {
u64::try_from(self.data().len()).unwrap_or(u64::MAX)
}
pub fn pe_signature_offset(&self) -> Result<u64> {
let data = self.data();
if data.len() < 64 {
return Err(LayoutFailed(
"File too small to contain DOS header".to_string(),
));
}
let bytes = data
.get(60..64)
.and_then(|s| <[u8; 4]>::try_from(s).ok())
.ok_or_else(|| LayoutFailed("DOS header truncated".to_string()))?;
let pe_offset = u32::from_le_bytes(bytes);
Ok(u64::from(pe_offset))
}
pub fn pe_headers_size(&self) -> Result<u64> {
let pe_sig_offset = self.pe_signature_offset()?;
let data = self.data();
let coff_header_offset = pe_sig_offset
.checked_add(4)
.ok_or_else(|| LayoutFailed("COFF header offset overflow".to_string()))?;
let coff_end = coff_header_offset
.checked_add(20)
.ok_or_else(|| LayoutFailed("COFF header end overflow".to_string()))?;
let coff_end_usize = usize::try_from(coff_end)
.map_err(|_| LayoutFailed("COFF header end exceeds usize".to_string()))?;
if data.len() < coff_end_usize {
return Err(LayoutFailed(
"File too small to contain COFF header".to_string(),
));
}
let opt_header_size_offset = coff_header_offset
.checked_add(16)
.ok_or_else(|| LayoutFailed("Optional header size offset overflow".to_string()))?;
let opt_offset_usize = usize::try_from(opt_header_size_offset)
.map_err(|_| LayoutFailed("Optional header offset exceeds usize".to_string()))?;
let opt_end_usize = opt_offset_usize
.checked_add(2)
.ok_or_else(|| LayoutFailed("Optional header offset+2 overflow".to_string()))?;
let opt_bytes = data
.get(opt_offset_usize..opt_end_usize)
.and_then(|s| <[u8; 2]>::try_from(s).ok())
.ok_or_else(|| LayoutFailed("Optional header size truncated".to_string()))?;
let opt_header_size = u16::from_le_bytes(opt_bytes);
24u64
.checked_add(u64::from(opt_header_size))
.ok_or_else(|| LayoutFailed("PE headers size overflow".to_string()))
}
pub fn align_to_file_alignment(&self, offset: u64) -> Result<u64> {
let file_alignment = u64::from(self.file_alignment()?);
Ok(align_to(offset, file_alignment))
}
#[must_use]
pub fn pe(&self) -> &Pe {
&self.pe
}
pub fn pe_mut(&mut self) -> &mut Pe {
&mut self.pe
}
pub fn into_data(self) -> Result<Vec<u8>> {
Ok(self.data.into_vec()?)
}
#[must_use]
pub fn repairs(&self) -> &[RepairAction] {
&self.repairs
}
}
#[cfg(test)]
mod tests {
use std::{env, fs, path::PathBuf};
use crate::{file::pe::DataDirectoryType, test::factories::general::file::verify_file, File};
#[test]
fn load_file() {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/samples/WindowsBase.dll");
let file = File::from_path(&path).unwrap();
verify_file(&file);
}
#[test]
fn load_buffer() {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/samples/WindowsBase.dll");
let data = fs::read(&path).unwrap();
let file = File::from_mem(data).unwrap();
verify_file(&file);
}
#[test]
fn load_invalid() {
let data = include_bytes!("../../tests/samples/WB_ROOT.bin");
if File::from_mem(data.to_vec()).is_ok() {
panic!("This should not load!")
}
}
#[test]
fn test_get_data_directory() {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/samples/WindowsBase.dll");
let file = File::from_path(&path).unwrap();
let clr_dir = file.get_data_directory(DataDirectoryType::ClrRuntimeHeader);
assert!(clr_dir.is_some(), "CLR runtime header should exist");
let (clr_rva, clr_size) = clr_dir.unwrap();
assert!(clr_rva > 0, "CLR RVA should be non-zero");
assert!(clr_size > 0, "CLR size should be non-zero");
let (expected_rva, expected_size) = file.clr().expect("Should have CLR header");
assert_eq!(
clr_rva as usize, expected_rva,
"CLR RVA should match clr() method"
);
assert_eq!(
clr_size as usize, expected_size,
"CLR size should match clr() method"
);
let _base_reloc_dir = file.get_data_directory(DataDirectoryType::BaseRelocationTable);
let tls_dir = file.get_data_directory(DataDirectoryType::TlsTable);
if let Some((tls_rva, tls_size)) = tls_dir {
assert!(
tls_rva > 0 && tls_size > 0,
"If TLS directory exists, it should have valid values"
);
}
}
#[test]
fn test_pe_signature_offset() {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/samples/crafted_2.exe");
let file = File::from_path(&path).expect("Failed to load test assembly");
let pe_offset = file
.pe_signature_offset()
.expect("Should get PE signature offset");
assert!(pe_offset > 0, "PE signature offset should be positive");
assert!(pe_offset < 1024, "PE signature offset should be reasonable");
}
#[test]
fn test_pe_headers_size() {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/samples/crafted_2.exe");
let file = File::from_path(&path).expect("Failed to load test assembly");
let headers_size = file
.pe_headers_size()
.expect("Should calculate headers size");
assert!(headers_size >= 24, "Headers should be at least 24 bytes");
assert!(headers_size <= 1024, "Headers size should be reasonable");
}
#[test]
fn test_align_to_file_alignment() {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/samples/crafted_2.exe");
let file = File::from_path(&path).expect("Failed to load test assembly");
let alignment = file.file_alignment().expect("Should get file alignment");
assert_eq!(file.align_to_file_alignment(0).unwrap(), 0);
assert_eq!(file.align_to_file_alignment(1).unwrap(), alignment as u64);
assert_eq!(
file.align_to_file_alignment(alignment as u64).unwrap(),
alignment as u64
);
assert_eq!(
file.align_to_file_alignment(alignment as u64 + 1).unwrap(),
(alignment * 2) as u64
);
}
}