use std::io::{Read, Seek};
use std::{collections, fmt, io, mem, ops, path, slice, vec};
use crate::map::{Cursor, MappedFile};
use crate::structs::*;
pub const PAGE_NOACCESS: u32 = 1;
pub const PAGE_READONLY: u32 = 2;
pub const PAGE_READWRITE: u32 = 4;
pub const PAGE_WRITECOPY: u32 = 8;
pub const PAGE_EXECUTE: u32 = 16;
pub const PAGE_EXECUTE_READ: u32 = 32;
pub const PAGE_EXECUTE_READWRITE: u32 = 64;
pub const PAGE_EXECUTE_WRITECOPY: u32 = 128;
pub const PAGE_GUARD: u32 = 0x1_00;
pub const PAGE_NOCACHE: u32 = 0x2_00;
pub const PAGE_WRITECOMBINE: u32 = 0x4_00;
const READABLE: u32 = PAGE_READONLY
| PAGE_READWRITE
| PAGE_EXECUTE_READ
| PAGE_EXECUTE_READWRITE
| PAGE_EXECUTE_WRITECOPY
| PAGE_WRITECOPY;
const WRITABLE: u32 = PAGE_READWRITE | PAGE_EXECUTE_READWRITE | PAGE_WRITECOPY;
const EXECUTABLE: u32 =
PAGE_EXECUTE | PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_EXECUTE_WRITECOPY;
#[allow(clippy::len_without_is_empty)]
#[derive(Default, Debug)]
pub struct Module<'a> {
pub range: ops::Range<u64>,
pub checksum: u32,
pub time_date_stamp: u32,
pub path: path::PathBuf,
pub version_info: FixedFileInfo,
pub cv_record: &'a [u8],
pub misc_record: &'a [u8],
}
impl<'a> Module<'a> {
fn new(
entry: ModuleEntry,
module_name: String,
cv_record: &'a [u8],
misc_record: &'a [u8],
) -> Self {
let start = entry.base_of_image;
let end = entry.base_of_image + entry.size_of_image as u64;
let range = ops::Range { start, end };
if range.is_empty() {
panic!("range is malformed");
}
Self {
range,
checksum: entry.checksum,
time_date_stamp: entry.time_date_stamp,
path: module_name.into(),
version_info: entry.version_info,
cv_record,
misc_record,
}
}
pub fn file_name(&self) -> Option<&str> {
self.path.file_name().unwrap().to_str()
}
pub fn start_addr(&self) -> u64 {
self.range.start
}
pub fn end_addr(&self) -> u64 {
self.range.end - 1
}
pub fn len(&self) -> u64 {
self.range.end - self.range.start
}
}
#[derive(Debug)]
pub enum ThreadContext {
X86(Box<ThreadContextX86>),
X64(Box<ThreadContextX64>),
}
impl fmt::Display for ThreadContext {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::X86(ctx) => ctx.fmt(f),
Self::X64(ctx) => ctx.fmt(f),
}
}
}
#[derive(Debug)]
pub struct Thread {
pub id: u32,
pub suspend_count: u32,
pub priority_class: u32,
pub priority: u32,
pub teb: u64,
context: ThreadContext,
}
impl Thread {
fn new(entry: ThreadEntry, context: ThreadContext) -> Self {
Self {
id: entry.thread_id,
suspend_count: entry.suspend_count,
priority_class: entry.priority_class,
priority: entry.priority,
teb: entry.teb,
context,
}
}
pub fn context(&self) -> &ThreadContext {
&self.context
}
}
#[derive(Default, Debug)]
#[allow(clippy::len_without_is_empty)]
pub struct MemBlock<'a> {
pub range: ops::Range<u64>,
pub allocation_base: u64,
pub allocation_protect: u32,
pub state: u32,
pub protect: u32,
pub type_: u32,
pub data: &'a [u8],
}
impl<'a> MemBlock<'a> {
pub fn is_readable(&self) -> bool {
(self.protect & READABLE) != 0
}
pub fn is_writable(&self) -> bool {
(self.protect & WRITABLE) != 0
}
pub fn is_executable(&self) -> bool {
(self.protect & EXECUTABLE) != 0
}
pub fn state_as_str(&self) -> &str {
match self.state {
0x10_00 => "MEM_COMMIT",
0x20_00 => "MEM_RESERVE",
0x1_00_00 => "MEM_FREE",
_ => "UNKNOWN",
}
}
pub fn type_as_str(&self) -> &str {
if self.state == 0x1_00_00 {
return "";
}
match self.type_ {
0x2_00_00 => "MEM_PRIVATE",
0x4_00_00 => "MEM_MAPPED",
0x1_00_00_00 => "MEM_IMAGE",
_ => "UNKNOWN",
}
}
pub fn protect_as_str(&self) -> String {
if self.protect == 0 {
return "".into();
}
let bits = collections::HashMap::from([
(PAGE_GUARD, "PAGE_GUARD"),
(PAGE_NOCACHE, "PAGE_NOCACHE"),
(PAGE_WRITECOMBINE, "PAGE_WRITECOMBINE"),
]);
let mut parts = vec::Vec::new();
let mut protect = self.protect;
for (mask, str) in bits.iter() {
if (protect & mask) == 0 {
continue;
}
protect &= !mask;
parts.push(*str);
}
parts.push(match protect {
PAGE_NOACCESS => "PAGE_NOACCESS",
PAGE_READONLY => "PAGE_READONLY",
PAGE_READWRITE => "PAGE_READWRITE",
PAGE_WRITECOPY => "PAGE_WRITECOPY",
PAGE_EXECUTE => "PAGE_EXECUTE",
PAGE_EXECUTE_READ => "PAGE_EXECUTE_READ",
PAGE_EXECUTE_READWRITE => "PAGE_EXECUTE_READWRITE",
PAGE_EXECUTE_WRITECOPY => "PAGE_EXECUTE_WRITECOPY",
_ => "UNKNOWN",
});
parts.join(" | ")
}
pub fn data_from(&self, addr: u64) -> Option<&[u8]> {
if self.data.is_empty() || !self.range.contains(&addr) {
return None;
}
let offset = addr - self.range.start;
Some(&self.data[offset.try_into().unwrap()..])
}
pub fn start_addr(&self) -> u64 {
self.range.start
}
pub fn end_addr(&self) -> u64 {
self.range.end
}
pub fn len(&self) -> u64 {
self.range.end - self.range.start
}
}
impl<'a> From<MemoryInfo> for MemBlock<'a> {
fn from(value: MemoryInfo) -> Self {
Self {
range: value.base_address..(value.base_address + value.region_size),
allocation_base: value.allocation_base,
allocation_protect: value.allocation_protect,
state: value.state,
protect: value.protect,
type_: value.type_,
..Default::default()
}
}
}
pub type MemBlocks<'a> = collections::BTreeMap<u64, MemBlock<'a>>;
pub type Threads = collections::BTreeMap<u32, Thread>;
pub type Modules<'a> = collections::BTreeMap<u64, Module<'a>>;
#[derive(Debug, Clone, Copy)]
pub enum Arch {
X86,
X64,
}
#[derive(Debug)]
pub struct UserDumpParser<'a> {
pub foreground_tid: Option<u32>,
arch: Arch,
mem_blocks: MemBlocks<'a>,
modules: Modules<'a>,
threads: Threads,
_mapped_file: MappedFile<'a>,
}
impl<'a> UserDumpParser<'a> {
pub fn new<S: AsRef<path::Path>>(path: S) -> io::Result<UserDumpParser<'a>> {
let mapped_file = MappedFile::new(path)?;
Self::with_file(mapped_file)
}
pub fn with_slice(
slice: &'a impl std::ops::Deref<Target = [u8]>,
) -> io::Result<UserDumpParser<'a>> {
Self::with_file(MappedFile::from(slice.deref()))
}
pub fn is_arch_x64(&self) -> bool {
matches!(self.arch, Arch::X64)
}
pub fn is_arch_x86(&self) -> bool {
matches!(self.arch, Arch::X86)
}
pub fn modules(&self) -> &Modules {
&self.modules
}
pub fn get_module(&self, address: u64) -> Option<&Module> {
self.modules
.values()
.find(|module| module.range.contains(&address))
}
pub fn threads(&self) -> &Threads {
&self.threads
}
pub fn get_thread(&self, id: u32) -> Option<&Thread> {
self.threads.values().find(|thread| thread.id == id)
}
pub fn mem_blocks(&self) -> &MemBlocks {
&self.mem_blocks
}
pub fn get_mem_block(&self, address: u64) -> Option<&MemBlock> {
self.mem_blocks
.values()
.find(|block| block.range.contains(&address))
}
fn slice_from_location_descriptor(
reader: &Cursor,
location: LocationDescriptor32,
) -> io::Result<&'a [u8]> {
let offset = location.rva.try_into().unwrap();
let len = location.data_size.try_into().unwrap();
let slice_ref = reader.get_ref();
let (_, tail) = slice_ref.split_at(offset);
if tail.len() < len {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"not enough data for slicing",
));
}
if len > isize::MAX.try_into().unwrap() {
panic!("len > isize::MAX");
}
Ok(unsafe { slice::from_raw_parts(tail.as_ptr(), len) })
}
fn parse_system_info(cursor: &mut Cursor) -> io::Result<Arch> {
let system_info = read_struct::<SystemInfoStream>(cursor)?;
Ok(match system_info.processor_arch {
ARCH_X86 => Arch::X86,
ARCH_X64 => Arch::X64,
_ => panic!("Unsupported architecture {:x}", system_info.processor_arch),
})
}
fn parse_exception(cursor: &mut Cursor) -> io::Result<u32> {
let exception = read_struct::<ExceptionStream>(cursor)?;
Ok(exception.thread_id)
}
fn parse_mem_info_list(cursor: &mut Cursor) -> io::Result<MemBlocks<'a>> {
let mut mem_blocks = MemBlocks::new();
let mem_info_list = read_struct::<MemoryInfoListStream>(cursor)?;
let mem_info_size = mem::size_of::<MemoryInfo>() as u32;
let size_of_entry = mem_info_list.size_of_entry;
if size_of_entry < mem_info_size {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"MemoryInfo's size ({}) doesn't match the dump ({})",
mem_info_size, mem_info_list.size_of_entry
),
));
}
for _ in 0..mem_info_list.number_of_entries {
let mem_info = peek_struct::<MemoryInfo>(cursor)?;
let key = mem_info.base_address;
let previous_val = mem_blocks.insert(key, mem_info.into());
if previous_val.is_some() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Address {} already in the mem map", key),
));
}
cursor.seek(io::SeekFrom::Current(size_of_entry.into()))?;
}
Ok(mem_blocks)
}
fn parse_mem64_list(cursor: &mut Cursor, mem_blocks: &mut MemBlocks<'a>) -> io::Result<()> {
let mem_list = read_struct::<Memory64ListStream>(cursor)?;
let mut data_offset = mem_list.base_rva;
for _ in 0..mem_list.number_of_memory_ranges {
let descriptor = read_struct::<MemoryDescriptor64>(cursor)?;
let entry = mem_blocks
.get_mut(&descriptor.start_of_memory_range)
.ok_or(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Address {} in Memory64ListStream but not in MemoryInfoListStream",
descriptor.start_of_memory_range
),
))?;
entry.data = Self::slice_from_location_descriptor(cursor, LocationDescriptor32 {
rva: data_offset.try_into().unwrap(),
data_size: descriptor.data_size.try_into().unwrap(),
})?;
data_offset = data_offset.checked_add(descriptor.data_size).unwrap();
}
Ok(())
}
fn parse_thread_list(cursor: &mut Cursor, arch: Arch) -> io::Result<Threads> {
let mut threads = Threads::new();
let thread_list = read_struct::<ThreadList>(cursor)?;
for _ in 0..thread_list.number_of_threads {
let thread = read_struct::<ThreadEntry>(cursor)?;
let pos = cursor.stream_position()?;
let thread_context_slice =
Self::slice_from_location_descriptor(cursor, thread.thread_context)?;
let thread_context = match arch {
Arch::X86 => {
if thread_context_slice.len() < mem::size_of::<ThreadContextX86>() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"The X86 thread context for TID {} has an unexpected size",
thread.thread_id
),
));
}
let ptr = thread_context_slice.as_ptr() as *const ThreadContextX86;
ThreadContext::X86(Box::new(unsafe { std::ptr::read_unaligned(ptr) }))
}
Arch::X64 => {
if thread_context_slice.len() < mem::size_of::<ThreadContextX64>() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"The X64 thread context for TID {} has an unexpected size",
thread.thread_id
),
));
}
let ptr = thread_context_slice.as_ptr() as *const ThreadContextX64;
ThreadContext::X64(Box::new(unsafe { std::ptr::read_unaligned(ptr) }))
}
};
let key = thread.thread_id;
let thread = Thread::new(thread, thread_context);
let previous_val = threads.insert(key, thread);
if previous_val.is_some() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Thread {} already in the map", key),
));
}
cursor.seek(io::SeekFrom::Start(pos))?;
}
Ok(threads)
}
fn parse_module_list(cursor: &mut Cursor) -> io::Result<Modules<'a>> {
let mut modules = Modules::new();
let module_list = read_struct::<ModuleList>(cursor)?;
for _ in 0..module_list.number_of_modules {
let module = read_struct::<ModuleEntry>(cursor)?;
let pos = cursor.stream_position()?;
let cv_record = Self::slice_from_location_descriptor(cursor, module.cv_record)?;
let misc_record = Self::slice_from_location_descriptor(cursor, module.misc_record)?;
cursor.seek(io::SeekFrom::Start(module.module_name_rva.into()))?;
let module_name_length = read_struct::<u32>(cursor)?.try_into().unwrap();
let mut module_name = vec![0; module_name_length];
cursor.read_exact(module_name.as_mut_slice())?;
let module_name = utf16_string_from_slice(&module_name).map_err(|e| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Module name is incorrect utf8: {e}"),
)
})?;
let module = Module::new(module, module_name, cv_record, misc_record);
let previous_val = modules.insert(module.range.start, module);
if let Some(previous_val) = previous_val {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Module {} already in the map", previous_val.path.display()),
));
}
cursor.seek(io::SeekFrom::Start(pos))?;
}
Ok(modules)
}
pub fn with_file(_mapped_file: MappedFile<'a>) -> io::Result<UserDumpParser<'a>> {
let mut cursor = _mapped_file.cursor();
let hdr = read_struct::<Header>(&mut cursor)?;
if hdr.signature != EXPECTED_DUMP_SIGNATURE {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Header signature {:x} is unexpected", hdr.signature),
));
}
if (hdr.flags & VALID_DUMP_FLAGS) != 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Header signature {:x} is unexpected", hdr.signature),
));
}
cursor.seek(io::SeekFrom::Start(hdr.stream_directory_rva.into()))?;
let mut directory_locations = collections::HashMap::new();
for _ in 0..hdr.number_of_streams {
let directory = read_struct::<Directory>(&mut cursor)?;
if directory.stream_type == STREAM_TYPE_UNUSED {
break;
}
directory_locations.insert(directory.stream_type, directory.location);
}
let required = true;
let not_required = false;
let directory_parsing_order = [
(STREAM_TYPE_SYSTEM_INFO, required),
(STREAM_TYPE_EXCEPTION, not_required),
(STREAM_TYPE_MEMORY_INFO_LIST, required),
(STREAM_TYPE_MEMORY64_LIST, required),
(STREAM_TYPE_THREAD_LIST, not_required),
(STREAM_TYPE_MODULE_LIST, not_required),
];
let mut arch = None;
let mut foreground_tid = None;
let mut mem_blocks = MemBlocks::new();
let mut modules = Modules::new();
let mut threads = Threads::new();
for (directory_type, required) in directory_parsing_order {
let directory_location = directory_locations.get(&directory_type);
let Some(directory_location) = directory_location else {
if required {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("The directory {directory_type} is required but not present"),
));
}
continue;
};
cursor.seek(io::SeekFrom::Start(directory_location.rva.into()))?;
match directory_type {
STREAM_TYPE_SYSTEM_INFO => arch = Some(Self::parse_system_info(&mut cursor)?),
STREAM_TYPE_EXCEPTION => foreground_tid = Some(Self::parse_exception(&mut cursor)?),
STREAM_TYPE_MEMORY_INFO_LIST => {
mem_blocks = Self::parse_mem_info_list(&mut cursor)?
}
STREAM_TYPE_MEMORY64_LIST => Self::parse_mem64_list(&mut cursor, &mut mem_blocks)?,
STREAM_TYPE_THREAD_LIST => {
threads = Self::parse_thread_list(&mut cursor, arch.unwrap())?
}
STREAM_TYPE_MODULE_LIST => modules = Self::parse_module_list(&mut cursor)?,
_ => unreachable!("Only parsing stream types we know about"),
};
}
let arch = arch.unwrap();
Ok(UserDumpParser {
_mapped_file,
arch,
foreground_tid,
mem_blocks,
modules,
threads,
})
}
}
fn peek_struct<T>(cursor: &mut Cursor) -> io::Result<T> {
let mut s = mem::MaybeUninit::uninit();
let size_of_s = mem::size_of_val(&s);
let slice_over_s = unsafe { slice::from_raw_parts_mut(s.as_mut_ptr() as *mut u8, size_of_s) };
let pos = cursor.position();
cursor.read_exact(slice_over_s)?;
cursor.seek(io::SeekFrom::Start(pos))?;
Ok(unsafe { s.assume_init() })
}
fn read_struct<T>(cursor: &mut Cursor) -> io::Result<T> {
let s = peek_struct(cursor)?;
let size_of_s = mem::size_of_val(&s);
cursor.seek(io::SeekFrom::Current(size_of_s.try_into().unwrap()))?;
Ok(s)
}
fn utf16_string_from_slice(slice: &[u8]) -> io::Result<String> {
if (slice.len() % 2) != 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Slice length needs to be % 2",
));
}
let iter = slice.chunks(2).map(|c| u16::from_le_bytes([c[0], c[1]]));
char::decode_utf16(iter)
.collect::<Result<_, _>>()
.or(Err(io::Error::new(
io::ErrorKind::InvalidData,
"Module name is not UTF16",
)))
}
#[cfg(test)]
mod tests {
use core::fmt::Debug;
use crate::UserDumpParser;
#[test]
fn assert_traits() {
fn assert_traits_<T: Send + Sync + Debug>() {}
assert_traits_::<UserDumpParser>();
}
}