#[repr(C)]
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
#[cfg_attr(feature = "gpu", derive(bytemuck::Pod, bytemuck::Zeroable))]
pub struct FileContext {
pub file_size: u32,
pub entropy_bucket: u32,
pub magic_u32: u32,
pub is_pe: u32,
pub is_dll: u32,
pub is_64bit: u32,
pub has_signature: u32,
pub num_sections: u32,
pub num_imports: u32,
pub entry_point_rva: u32,
pub unique_pattern_count: u32,
pub total_match_count: u32,
}
#[derive(Debug, Clone, Copy, Default)]
struct PeSummary {
is_dll: bool,
is_64bit: bool,
has_signature: bool,
num_sections: u16,
num_imports: u32,
entry_point_rva: u32,
}
impl FileContext {
pub fn from_bytes(data: &[u8]) -> Self {
let pe = parse_pe_summary(data);
Self {
file_size: u32::try_from(data.len()).unwrap_or(u32::MAX),
entropy_bucket: entropy_bucket(data) as u32,
magic_u32: if data.len() >= 4 {
u32::from_le_bytes([data[0], data[1], data[2], data[3]])
} else {
0
},
is_pe: pe.is_some() as u32,
is_dll: pe.map(|value| value.is_dll as u32).unwrap_or(0),
is_64bit: pe.map(|value| value.is_64bit as u32).unwrap_or(0),
has_signature: pe.map(|value| value.has_signature as u32).unwrap_or(0),
num_sections: pe.map(|value| value.num_sections as u32).unwrap_or(0),
num_imports: pe.map(|value| value.num_imports).unwrap_or(0),
entry_point_rva: pe.map(|value| value.entry_point_rva).unwrap_or(0),
..Self::default()
}
}
}
fn entropy_bucket(data: &[u8]) -> u8 {
if data.is_empty() {
return 0;
}
let mut counts = [0usize; 256];
for byte in data {
counts[*byte as usize] += 1;
}
let len = data.len() as f64;
let mut entropy = 0.0f64;
for count in counts {
if count == 0 {
continue;
}
let p = count as f64 / len;
entropy -= p * p.log2();
}
((entropy / 8.0) * 255.0).round().clamp(0.0, 255.0) as u8
}
fn parse_pe_summary(data: &[u8]) -> Option<PeSummary> {
if data.len() < 0x40 || &data[..2] != b"MZ" {
return None;
}
let pe_offset = read_u32(data, 0x3c)? as usize;
if pe_offset.checked_add(0x18)? > data.len() || data.get(pe_offset..pe_offset + 4)? != b"PE\0\0" {
return None;
}
let coff_offset = pe_offset + 4;
let characteristics = read_u16(data, coff_offset + 18)?;
let num_sections = read_u16(data, coff_offset + 2)?;
let optional_size = read_u16(data, coff_offset + 16)? as usize;
let optional_offset = coff_offset + 20;
if optional_offset.checked_add(optional_size)? > data.len() || optional_size < 2 {
return None;
}
let optional_magic = read_u16(data, optional_offset)?;
let is_64bit = optional_magic == 0x20b;
let data_dir_offset = if is_64bit {
optional_offset + 112
} else if optional_magic == 0x10b {
optional_offset + 96
} else {
return None;
};
if data_dir_offset.checked_add(8 * 5)? > optional_offset + optional_size {
return None;
}
let entry_point_rva = read_u32(data, optional_offset + 16).unwrap_or(0);
let cert_rva = read_u32(data, data_dir_offset + 8 * 4).unwrap_or(0);
let cert_size = read_u32(data, data_dir_offset + 8 * 4 + 4).unwrap_or(0);
let import_rva = read_u32(data, data_dir_offset + 8).unwrap_or(0);
let import_size = read_u32(data, data_dir_offset + 12).unwrap_or(0);
let section_table = optional_offset + optional_size;
let sections = parse_sections(data, section_table, num_sections as usize)?;
let num_imports = count_pe_imports(data, §ions, import_rva, import_size, is_64bit).unwrap_or(0);
Some(PeSummary {
is_dll: (characteristics & 0x2000) != 0,
is_64bit,
has_signature: cert_rva != 0 && cert_size != 0,
num_sections,
num_imports,
entry_point_rva,
})
}
fn parse_sections(data: &[u8], offset: usize, count: usize) -> Option<Vec<(u32, u32, u32)>> {
let table_len = count.checked_mul(40)?;
if offset.checked_add(table_len)? > data.len() {
return None;
}
let mut sections = Vec::with_capacity(count);
for idx in 0..count {
let base = offset + idx * 40;
let virtual_size = read_u32(data, base + 8)?;
let virtual_address = read_u32(data, base + 12)?;
let raw_size = read_u32(data, base + 16)?;
let raw_ptr = read_u32(data, base + 20)?;
sections.push((virtual_address, virtual_size.max(raw_size), raw_ptr));
}
Some(sections)
}
fn count_pe_imports(
data: &[u8],
sections: &[(u32, u32, u32)],
import_rva: u32,
import_size: u32,
is_64bit: bool,
) -> Option<u32> {
if import_rva == 0 || import_size == 0 {
return Some(0);
}
let import_offset = rva_to_offset(sections, import_rva)?;
let import_limit = import_offset.checked_add(import_size as usize)?.min(data.len());
let thunk_width = if is_64bit { 8usize } else { 4usize };
let mut descriptor = import_offset;
let mut imports = 0u32;
while descriptor.checked_add(20)? <= import_limit {
let original_first_thunk = read_u32(data, descriptor)?;
let name_rva = read_u32(data, descriptor + 12)?;
let first_thunk = read_u32(data, descriptor + 16)?;
if original_first_thunk == 0 && name_rva == 0 && first_thunk == 0 {
break;
}
let thunk_rva = if original_first_thunk != 0 {
original_first_thunk
} else {
first_thunk
};
let mut thunk_offset = match rva_to_offset(sections, thunk_rva) {
Some(value) => value,
None => {
descriptor += 20;
continue;
}
};
while thunk_offset.checked_add(thunk_width)? <= data.len() {
let entry = if is_64bit {
read_u64(data, thunk_offset)?
} else {
read_u32(data, thunk_offset)? as u64
};
if entry == 0 {
break;
}
let ordinal_mask = if is_64bit { 1u64 << 63 } else { 1u64 << 31 };
if (entry & ordinal_mask) == 0 {
imports = imports.saturating_add(1);
}
thunk_offset += thunk_width;
}
descriptor += 20;
}
Some(imports)
}
fn rva_to_offset(sections: &[(u32, u32, u32)], rva: u32) -> Option<usize> {
sections.iter().find_map(|(start, size, raw)| {
let end = start.checked_add(*size)?;
if (*start..end).contains(&rva) {
Some((*raw).checked_add(rva - *start)? as usize)
} else {
None
}
})
}
fn read_u16(data: &[u8], offset: usize) -> Option<u16> {
Some(u16::from_le_bytes(data.get(offset..offset + 2)?.try_into().ok()?))
}
fn read_u32(data: &[u8], offset: usize) -> Option<u32> {
Some(u32::from_le_bytes(data.get(offset..offset + 4)?.try_into().ok()?))
}
fn read_u64(data: &[u8], offset: usize) -> Option<u64> {
Some(u64::from_le_bytes(data.get(offset..offset + 8)?.try_into().ok()?))
}