udmp_parser/udmp_parser.rs
1// Axel '0vercl0k' Souchet - July 29 2023
2//! This module is where the parsing logic is implemented. The
3//! [`UserDumpParser`] can memory map a file by default but users can also build
4//! an instance from a slice they got from somewhere else.
5use std::io::{Read, Seek};
6use std::{collections, fmt, io, mem, ops, path, slice, vec};
7
8use crate::map::{Cursor, MappedFile};
9use crate::structs::*;
10
11/// Disables all access to the committed region of pages. An attempt to read
12/// from, write to, or execute the committed region results in an access
13/// violation.
14pub const PAGE_NOACCESS: u32 = 1;
15/// Enables read-only access to the committed region of pages. An attempt to
16/// write to the committed region results in an access violation. If Data
17/// Execution Prevention is enabled, an attempt to execute code in the committed
18/// region results in an access violation.
19pub const PAGE_READONLY: u32 = 2;
20/// Enables read-only or read/write access to the committed region of pages. If
21/// Data Execution Prevention is enabled, attempting to execute code in the
22/// committed region results in an access violation.
23pub const PAGE_READWRITE: u32 = 4;
24/// Enables read-only or copy-on-write access to a mapped view of a file mapping
25/// object. An attempt to write to a committed copy-on-write page results in a
26/// private copy of the page being made for the process. The private page is
27/// marked as PAGE_READWRITE, and the change is written to the new page. If Data
28/// Execution Prevention is enabled, attempting to execute code in the committed
29/// region results in an access violation.
30pub const PAGE_WRITECOPY: u32 = 8;
31/// Enables execute access to the committed region of pages. An attempt to write
32/// to the committed region results in an access violation.
33pub const PAGE_EXECUTE: u32 = 16;
34/// Enables execute or read-only access to the committed region of pages. An
35/// attempt to write to the committed region results in an access violation.
36pub const PAGE_EXECUTE_READ: u32 = 32;
37/// Enables execute, read-only, or read/write access to the committed region of
38/// pages.
39pub const PAGE_EXECUTE_READWRITE: u32 = 64;
40/// Enables execute, read-only, or copy-on-write access to a mapped view of a
41/// file mapping object. An attempt to write to a committed copy-on-write page
42/// results in a private copy of the page being made for the process. The
43/// private page is marked as PAGE_EXECUTE_READWRITE, and the change is written
44/// to the new page.
45pub const PAGE_EXECUTE_WRITECOPY: u32 = 128;
46/// Pages in the region become guard pages. Any attempt to access a guard page
47/// causes the system to raise a STATUS_GUARD_PAGE_VIOLATION exception and turn
48/// off the guard page status. Guard pages thus act as a one-time access alarm.
49pub const PAGE_GUARD: u32 = 0x1_00;
50/// Sets all pages to be non-cachable. Applications should not use this
51/// attribute except when explicitly required for a device. Using the
52/// interlocked functions with memory that is mapped with SEC_NOCACHE can result
53/// in an EXCEPTION_ILLEGAL_INSTRUCTION exception.
54pub const PAGE_NOCACHE: u32 = 0x2_00;
55/// Sets all pages to be write-combined. Applications should not use this
56/// attribute except when explicitly required for a device. Using the
57/// interlocked functions with memory that is mapped as write-combined can
58/// result in an EXCEPTION_ILLEGAL_INSTRUCTION exception.
59pub const PAGE_WRITECOMBINE: u32 = 0x4_00;
60
61/// The memory rights constants on Windows make it annoying to know if the page
62/// is readable / writable / executable, so we have to create our own masks.
63/// A page is readable if it is protected with any of the below rights.
64const READABLE: u32 = PAGE_READONLY
65 | PAGE_READWRITE
66 | PAGE_EXECUTE_READ
67 | PAGE_EXECUTE_READWRITE
68 | PAGE_EXECUTE_WRITECOPY
69 | PAGE_WRITECOPY;
70
71/// A page is writable if it is protected with any of the below rights.
72const WRITABLE: u32 = PAGE_READWRITE | PAGE_EXECUTE_READWRITE | PAGE_WRITECOPY;
73/// A page is executable if it is protected with any of the below rights.
74const EXECUTABLE: u32 =
75 PAGE_EXECUTE | PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_EXECUTE_WRITECOPY;
76
77/// A DLL loaded in the virtual address space.
78#[allow(clippy::len_without_is_empty)]
79#[derive(Default, Debug)]
80pub struct Module<'a> {
81 /// The range of where the module is loaded in memory at.
82 pub range: ops::Range<u64>,
83 /// PE checksum of the module.
84 pub checksum: u32,
85 /// Timestamp.
86 pub time_date_stamp: u32,
87 /// The module path on the file system.
88 pub path: path::PathBuf,
89 pub version_info: FixedFileInfo,
90 pub cv_record: &'a [u8],
91 pub misc_record: &'a [u8],
92}
93
94impl<'a> Module<'a> {
95 /// Build a new [`Module`] instance.
96 fn new(
97 entry: ModuleEntry,
98 module_name: String,
99 cv_record: &'a [u8],
100 misc_record: &'a [u8],
101 ) -> Self {
102 let start = entry.base_of_image;
103 let end = entry.base_of_image + entry.size_of_image as u64;
104 let range = ops::Range { start, end };
105 if range.is_empty() {
106 panic!("range is malformed");
107 }
108
109 Self {
110 range,
111 checksum: entry.checksum,
112 time_date_stamp: entry.time_date_stamp,
113 path: module_name.into(),
114 version_info: entry.version_info,
115 cv_record,
116 misc_record,
117 }
118 }
119
120 /// Get the file name of the module. This returns [`None`] if the file name
121 /// can't be converted to a Rust string.
122 pub fn file_name(&self) -> Option<&str> {
123 self.path.file_name().unwrap().to_str()
124 }
125
126 /// Get the address of where the module was loaded at.
127 pub fn start_addr(&self) -> u64 {
128 self.range.start
129 }
130
131 /// Get the address of where the last byte of the module was loaded at.
132 pub fn end_addr(&self) -> u64 {
133 self.range.end - 1
134 }
135
136 /// Get the length of the range of memory the module was loaded at.
137 pub fn len(&self) -> u64 {
138 self.range.end - self.range.start
139 }
140}
141
142/// A [`ThreadContext`] stores the thread contexts for the architecture that are
143/// supported by the library.
144#[derive(Debug)]
145pub enum ThreadContext {
146 /// The Intel x86 thread context.
147 X86(Box<ThreadContextX86>),
148 /// The Intel x64 thread context.
149 X64(Box<ThreadContextX64>),
150}
151
152/// Display the [`ThreadContext`] like WinDbg would.
153impl fmt::Display for ThreadContext {
154 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155 match self {
156 Self::X86(ctx) => ctx.fmt(f),
157 Self::X64(ctx) => ctx.fmt(f),
158 }
159 }
160}
161
162/// A thread that was running when the dump was generated.
163#[derive(Debug)]
164pub struct Thread {
165 /// The thread ID.
166 pub id: u32,
167 /// The suspend count counter cf [Freezing and Suspending Threads](https://learn.microsoft.com/en-us/windows-hardware/drivers/debugger/controlling-processes-and-threads).
168 pub suspend_count: u32,
169 /// The priority class cf [Priority Class](https://learn.microsoft.com/en-us/windows/win32/procthread/scheduling-priorities).
170 pub priority_class: u32,
171 /// Thread priority cf [Priority level](https://learn.microsoft.com/en-us/windows/win32/procthread/scheduling-priorities).
172 pub priority: u32,
173 /// The thread environment block address.
174 pub teb: u64,
175 /// The thread context.
176 context: ThreadContext,
177}
178
179impl Thread {
180 /// Build a new [`Thread`] instance.
181 fn new(entry: ThreadEntry, context: ThreadContext) -> Self {
182 Self {
183 id: entry.thread_id,
184 suspend_count: entry.suspend_count,
185 priority_class: entry.priority_class,
186 priority: entry.priority,
187 teb: entry.teb,
188 context,
189 }
190 }
191
192 /// Get a reference to the [`ThreadContext`].
193 pub fn context(&self) -> &ThreadContext {
194 &self.context
195 }
196}
197
198/// A block of memory in the address space that isn't a [`Module`]. [`MemBlock`]
199/// can have `data` associated with it but isn't a guarantee (think about a
200/// memory region that is mapped as `PAGE_NOACCESS`).
201#[derive(Default, Debug)]
202#[allow(clippy::len_without_is_empty)]
203pub struct MemBlock<'a> {
204 /// Range over the start/end address of the memory region.
205 pub range: ops::Range<u64>,
206 /// The base of the allocation that gave life to this memory region.
207 pub allocation_base: u64,
208 /// The page protection used at allocation time.
209 pub allocation_protect: u32,
210 /// The state of the memory region. See [State](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-memory_basic_information).
211 pub state: u32,
212 /// The page protection currently applied to the memory region.
213 pub protect: u32,
214 /// The type of memory region. See [Type](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-memory_basic_information).
215 pub type_: u32,
216 /// The [`MemBlock`]'s data.
217 pub data: &'a [u8],
218}
219
220impl<'a> MemBlock<'a> {
221 /// Is the memory region readable?
222 pub fn is_readable(&self) -> bool {
223 (self.protect & READABLE) != 0
224 }
225
226 /// Is the memory region writable?
227 pub fn is_writable(&self) -> bool {
228 (self.protect & WRITABLE) != 0
229 }
230
231 /// Is the memory region executable?
232 pub fn is_executable(&self) -> bool {
233 (self.protect & EXECUTABLE) != 0
234 }
235
236 /// Stringify the memory region state.
237 pub fn state_as_str(&self) -> &str {
238 match self.state {
239 0x10_00 => "MEM_COMMIT",
240 0x20_00 => "MEM_RESERVE",
241 0x1_00_00 => "MEM_FREE",
242 _ => "UNKNOWN",
243 }
244 }
245
246 /// Stringify the memory region type.
247 pub fn type_as_str(&self) -> &str {
248 if self.state == 0x1_00_00 {
249 return "";
250 }
251
252 match self.type_ {
253 0x2_00_00 => "MEM_PRIVATE",
254 0x4_00_00 => "MEM_MAPPED",
255 0x1_00_00_00 => "MEM_IMAGE",
256 _ => "UNKNOWN",
257 }
258 }
259
260 /// Stringify the memory region protection.
261 pub fn protect_as_str(&self) -> String {
262 if self.protect == 0 {
263 return "".into();
264 }
265
266 // Those bits are the only ones that can be combined with the page
267 // protections from below. So strip those first off `protect`.
268 let bits = collections::HashMap::from([
269 (PAGE_GUARD, "PAGE_GUARD"),
270 (PAGE_NOCACHE, "PAGE_NOCACHE"),
271 (PAGE_WRITECOMBINE, "PAGE_WRITECOMBINE"),
272 ]);
273
274 // This is where the parts of the stringified mask are stored in.
275 let mut parts = vec::Vec::new();
276 let mut protect = self.protect;
277
278 // Walk through the bits to check if turned on.
279 for (mask, str) in bits.iter() {
280 // If the current bit isn't set, skip.
281 if (protect & mask) == 0 {
282 continue;
283 }
284
285 // If it is set, strip it off from `protect` and push its
286 // stringified value in the vector.
287 protect &= !mask;
288 parts.push(*str);
289 }
290
291 // Now we can handle the 'normal' page properties.
292 parts.push(match protect {
293 PAGE_NOACCESS => "PAGE_NOACCESS",
294 PAGE_READONLY => "PAGE_READONLY",
295 PAGE_READWRITE => "PAGE_READWRITE",
296 PAGE_WRITECOPY => "PAGE_WRITECOPY",
297 PAGE_EXECUTE => "PAGE_EXECUTE",
298 PAGE_EXECUTE_READ => "PAGE_EXECUTE_READ",
299 PAGE_EXECUTE_READWRITE => "PAGE_EXECUTE_READWRITE",
300 PAGE_EXECUTE_WRITECOPY => "PAGE_EXECUTE_WRITECOPY",
301 _ => "UNKNOWN",
302 });
303
304 parts.join(" | ")
305 }
306
307 /// Get a slice over the [`MemBlock`]'s data from its absolute address.
308 ///
309 /// If the dump had a memory block of size 4 bytes starting at address
310 /// 0xdead then calling `data_from(0xdead+1)` returns a slice over the
311 /// last 3 bytes of the memory block. This is useful when you don't need
312 /// to reason about offsets.
313 pub fn data_from(&self, addr: u64) -> Option<&[u8]> {
314 // If the memory block is empty return `None`. Also bail if this
315 // `MemBlock` doesn't contain the address.
316 if self.data.is_empty() || !self.range.contains(&addr) {
317 return None;
318 }
319
320 // `addr` is contained in the range, so this is safe.
321 let offset = addr - self.range.start;
322
323 // Return the slice to the user.
324 Some(&self.data[offset.try_into().unwrap()..])
325 }
326
327 /// Get the address of where this [`MemBlock`] was at in memory.
328 pub fn start_addr(&self) -> u64 {
329 self.range.start
330 }
331
332 /// Get the end address of where this [`MemBlock`] was at in memory.
333 ///
334 /// Note that the underlying range is not inclusive, so this address is
335 /// pointing right after the last byte's address.
336 pub fn end_addr(&self) -> u64 {
337 self.range.end
338 }
339
340 /// Get the size of the [`MemBlock`].
341 ///
342 /// Note that a region of memory can exists without having any `data`
343 /// associated with it. This method returns the range len, not `data`'s len.
344 ///
345 /// An example is a memory region mapped as `PAGE_NOACCESS`; it exists in
346 /// the address space but has no content.
347 pub fn len(&self) -> u64 {
348 self.range.end - self.range.start
349 }
350}
351
352/// Convert a [`MemoryInfo`] into a [`MemBlock`].
353impl<'a> From<MemoryInfo> for MemBlock<'a> {
354 fn from(value: MemoryInfo) -> Self {
355 Self {
356 range: value.base_address..(value.base_address + value.region_size),
357 allocation_base: value.allocation_base,
358 allocation_protect: value.allocation_protect,
359 state: value.state,
360 protect: value.protect,
361 type_: value.type_,
362 ..Default::default()
363 }
364 }
365}
366
367/// Map a base address to a [`MemBlock`].
368pub type MemBlocks<'a> = collections::BTreeMap<u64, MemBlock<'a>>;
369
370/// Map a thread id to a [`Thread`].
371pub type Threads = collections::BTreeMap<u32, Thread>;
372
373/// Map a base address to a [`Module`].
374pub type Modules<'a> = collections::BTreeMap<u64, Module<'a>>;
375
376/// Architectures supported by the library.
377#[derive(Debug, Clone, Copy)]
378pub enum Arch {
379 /// Intel x86.
380 X86,
381 /// Intel x64.
382 X64,
383}
384
385/// This stores useful information fished out of of Windows minidump file:
386/// thread contexts and memory blocks.
387#[derive(Debug)]
388pub struct UserDumpParser<'a> {
389 /// The thread id of the foreground thread.
390 pub foreground_tid: Option<u32>,
391 /// The architecture of the dumped process.
392 arch: Arch,
393 /// A map of [`MemBlock`]s.
394 mem_blocks: MemBlocks<'a>,
395 /// A map of [`Module`].
396 modules: Modules<'a>,
397 /// A map of [`Thread`].
398 threads: Threads,
399 /// This is where we hold the backing data. Either it is a memory mapped
400 /// file, or a slice that needs to live as long as this.
401 _mapped_file: MappedFile<'a>,
402}
403
404impl<'a> UserDumpParser<'a> {
405 /// Create an instance from a filepath. This memory maps the file and parses
406 /// it.
407 pub fn new<S: AsRef<path::Path>>(path: S) -> io::Result<UserDumpParser<'a>> {
408 let mapped_file = MappedFile::new(path)?;
409 Self::with_file(mapped_file)
410 }
411
412 /// Create an instance from something that dereference to a slice of bytes.
413 pub fn with_slice(
414 slice: &'a impl std::ops::Deref<Target = [u8]>,
415 ) -> io::Result<UserDumpParser<'a>> {
416 Self::with_file(MappedFile::from(slice.deref()))
417 }
418
419 /// Is the architeture X64?
420 pub fn is_arch_x64(&self) -> bool {
421 matches!(self.arch, Arch::X64)
422 }
423
424 /// Is the architecture X86?
425 pub fn is_arch_x86(&self) -> bool {
426 matches!(self.arch, Arch::X86)
427 }
428
429 /// Get a reference to the base address -> [`Module`] map.
430 pub fn modules(&self) -> &Modules {
431 &self.modules
432 }
433
434 /// Find a [`Module`] that includes `address` in its range.
435 pub fn get_module(&self, address: u64) -> Option<&Module> {
436 self.modules
437 .values()
438 .find(|module| module.range.contains(&address))
439 }
440
441 /// Get a reference to the TID -> [`Thread`] map.
442 pub fn threads(&self) -> &Threads {
443 &self.threads
444 }
445
446 /// Find a [`Thread`] with a specific TID.
447 pub fn get_thread(&self, id: u32) -> Option<&Thread> {
448 self.threads.values().find(|thread| thread.id == id)
449 }
450
451 /// Get a reference to the base address -> [`MemBlock`] map.
452 pub fn mem_blocks(&self) -> &MemBlocks {
453 &self.mem_blocks
454 }
455
456 /// Find a [`MemBlock`] that includes `address` in its range.
457 pub fn get_mem_block(&self, address: u64) -> Option<&MemBlock> {
458 self.mem_blocks
459 .values()
460 .find(|block| block.range.contains(&address))
461 }
462
463 /// Utility to get a slice from a [`LocationDescriptor32`] safely.
464 fn slice_from_location_descriptor(
465 reader: &Cursor,
466 location: LocationDescriptor32,
467 ) -> io::Result<&'a [u8]> {
468 // Grab the offset and the wanted len.
469 let offset = location.rva.try_into().unwrap();
470 let len = location.data_size.try_into().unwrap();
471
472 // Grab a reference on the underlying slice.
473 let slice_ref = reader.get_ref();
474
475 // Split the slice in two. We only care about the tail.
476 let (_, tail) = slice_ref.split_at(offset);
477
478 // Make sure the tail slice is big enough.
479 if tail.len() < len {
480 return Err(io::Error::new(
481 io::ErrorKind::UnexpectedEof,
482 "not enough data for slicing",
483 ));
484 }
485
486 // Make sure we hold `from_raw_parts`'s contract.
487 if len > isize::MAX.try_into().unwrap() {
488 panic!("len > isize::MAX");
489 }
490
491 // Build the slice!
492 Ok(unsafe { slice::from_raw_parts(tail.as_ptr(), len) })
493 }
494
495 /// Parse the system info stream to know which architecture is used.
496 fn parse_system_info(cursor: &mut Cursor) -> io::Result<Arch> {
497 // Read the stream info.
498 let system_info = read_struct::<SystemInfoStream>(cursor)?;
499
500 // Build the value of the enum safely.
501 Ok(match system_info.processor_arch {
502 ARCH_X86 => Arch::X86,
503 ARCH_X64 => Arch::X64,
504 _ => panic!("Unsupported architecture {:x}", system_info.processor_arch),
505 })
506 }
507
508 /// Parse the exception stream to know figure out if there's a foreground
509 /// TID.
510 fn parse_exception(cursor: &mut Cursor) -> io::Result<u32> {
511 // Read the exception stream.
512 let exception = read_struct::<ExceptionStream>(cursor)?;
513
514 // Return the TID.
515 Ok(exception.thread_id)
516 }
517
518 /// Parse the memory info list stream to build the [`MemBlocks`] map.
519 fn parse_mem_info_list(cursor: &mut Cursor) -> io::Result<MemBlocks<'a>> {
520 // Create storage for the memory blocks.
521 let mut mem_blocks = MemBlocks::new();
522
523 // Read the memory info list stream.
524 let mem_info_list = read_struct::<MemoryInfoListStream>(cursor)?;
525
526 // Ensure that each entry is at least as big as what we expected.
527 let mem_info_size = mem::size_of::<MemoryInfo>() as u32;
528 let size_of_entry = mem_info_list.size_of_entry;
529 if size_of_entry < mem_info_size {
530 return Err(io::Error::new(
531 io::ErrorKind::InvalidData,
532 format!(
533 "MemoryInfo's size ({}) doesn't match the dump ({})",
534 mem_info_size, mem_info_list.size_of_entry
535 ),
536 ));
537 }
538
539 // Iterate through every entries.
540 for _ in 0..mem_info_list.number_of_entries {
541 // Read the memory info structure.
542 let mem_info = peek_struct::<MemoryInfo>(cursor)?;
543
544 // The key in the map is the base address.
545 let key = mem_info.base_address;
546
547 // If we already inserted this address, there's something wrong so
548 // bail.
549 let previous_val = mem_blocks.insert(key, mem_info.into());
550 if previous_val.is_some() {
551 return Err(io::Error::new(
552 io::ErrorKind::InvalidData,
553 format!("Address {} already in the mem map", key),
554 ));
555 }
556
557 // Move on to the next entry.
558 cursor.seek(io::SeekFrom::Current(size_of_entry.into()))?;
559 }
560
561 // We're done.
562 Ok(mem_blocks)
563 }
564
565 /// Parse the memory64 list stream to associate data to the MemBlock we
566 /// parsed from the memory info list stream. That's why we parse the memory
567 /// info list stream first.
568 fn parse_mem64_list(cursor: &mut Cursor, mem_blocks: &mut MemBlocks<'a>) -> io::Result<()> {
569 // Read the memory64 list stream.
570 let mem_list = read_struct::<Memory64ListStream>(cursor)?;
571
572 // Grab the starting offset.
573 let mut data_offset = mem_list.base_rva;
574
575 // Iterate through every entries.
576 for _ in 0..mem_list.number_of_memory_ranges {
577 // Read a descriptor.
578 let descriptor = read_struct::<MemoryDescriptor64>(cursor)?;
579
580 // Get a reference to the associated MemBlock off `mem_blocks`.
581 let entry = mem_blocks
582 .get_mut(&descriptor.start_of_memory_range)
583 .ok_or(io::Error::new(
584 io::ErrorKind::InvalidData,
585 format!(
586 "Address {} in Memory64ListStream but not in MemoryInfoListStream",
587 descriptor.start_of_memory_range
588 ),
589 ))?;
590
591 // Read the slice of bytes and associate it to the MemBlock instance.
592 entry.data = Self::slice_from_location_descriptor(cursor, LocationDescriptor32 {
593 rva: data_offset.try_into().unwrap(),
594 data_size: descriptor.data_size.try_into().unwrap(),
595 })?;
596
597 // Bump the offset by the size of this region to find where the next
598 // data slice is at.
599 data_offset = data_offset.checked_add(descriptor.data_size).unwrap();
600 }
601
602 // We're done!
603 Ok(())
604 }
605
606 /// Parse the tread list and extract their contexts.
607 fn parse_thread_list(cursor: &mut Cursor, arch: Arch) -> io::Result<Threads> {
608 // Create the map of threads.
609 let mut threads = Threads::new();
610
611 // Read the thread list.
612 let thread_list = read_struct::<ThreadList>(cursor)?;
613
614 // Iterate through every entries.
615 for _ in 0..thread_list.number_of_threads {
616 // Read the entry.
617 let thread = read_struct::<ThreadEntry>(cursor)?;
618
619 // Save the current position.
620 let pos = cursor.stream_position()?;
621
622 // Grab the slice of its context.
623 let thread_context_slice =
624 Self::slice_from_location_descriptor(cursor, thread.thread_context)?;
625
626 // Let's make sense of this slice based on what architectcure it is.
627 let thread_context = match arch {
628 // Read a ThreadContextX86 context if the slice is big enough.
629 Arch::X86 => {
630 if thread_context_slice.len() < mem::size_of::<ThreadContextX86>() {
631 return Err(io::Error::new(
632 io::ErrorKind::InvalidData,
633 format!(
634 "The X86 thread context for TID {} has an unexpected size",
635 thread.thread_id
636 ),
637 ));
638 }
639
640 // Build a reference to a ThreadContextX86 at this address.
641 let ptr = thread_context_slice.as_ptr() as *const ThreadContextX86;
642 ThreadContext::X86(Box::new(unsafe { std::ptr::read_unaligned(ptr) }))
643 }
644 // Read a ThreadContextX86 context if the slice is big enough.
645 Arch::X64 => {
646 if thread_context_slice.len() < mem::size_of::<ThreadContextX64>() {
647 return Err(io::Error::new(
648 io::ErrorKind::InvalidData,
649 format!(
650 "The X64 thread context for TID {} has an unexpected size",
651 thread.thread_id
652 ),
653 ));
654 }
655
656 // Build a reference to a ThreadContextX64 at this address.
657 let ptr = thread_context_slice.as_ptr() as *const ThreadContextX64;
658 ThreadContext::X64(Box::new(unsafe { std::ptr::read_unaligned(ptr) }))
659 }
660 };
661
662 // The key in the map is the thread id.
663 let key = thread.thread_id;
664
665 // Create a Thread from its context and the descriptor.
666 let thread = Thread::new(thread, thread_context);
667
668 // If we've already encountered a thread with this id, then let's
669 // bail.
670 let previous_val = threads.insert(key, thread);
671 if previous_val.is_some() {
672 return Err(io::Error::new(
673 io::ErrorKind::InvalidData,
674 format!("Thread {} already in the map", key),
675 ));
676 }
677
678 // Restore the position to get ready to parse the next entry.
679 cursor.seek(io::SeekFrom::Start(pos))?;
680 }
681
682 Ok(threads)
683 }
684
685 /// Parse the module list.
686 fn parse_module_list(cursor: &mut Cursor) -> io::Result<Modules<'a>> {
687 // Build the map of modules.
688 let mut modules = Modules::new();
689
690 // Read the module list.
691 let module_list = read_struct::<ModuleList>(cursor)?;
692
693 // Iterate through every entries.
694 for _ in 0..module_list.number_of_modules {
695 // Read the module entry.
696 let module = read_struct::<ModuleEntry>(cursor)?;
697
698 // Save the position.
699 let pos = cursor.stream_position()?;
700
701 // Grab the CV / misc record slices.
702 let cv_record = Self::slice_from_location_descriptor(cursor, module.cv_record)?;
703 let misc_record = Self::slice_from_location_descriptor(cursor, module.misc_record)?;
704
705 // Travel to where the module name is stored at.
706 cursor.seek(io::SeekFrom::Start(module.module_name_rva.into()))?;
707
708 // Read its length.
709 let module_name_length = read_struct::<u32>(cursor)?.try_into().unwrap();
710
711 // Allocate a backing buffer.
712 let mut module_name = vec![0; module_name_length];
713
714 // Read the module name off the slice into the buffer.
715 cursor.read_exact(module_name.as_mut_slice())?;
716
717 // Convert the module name into a Rust string.
718 let module_name = utf16_string_from_slice(&module_name).map_err(|e| {
719 io::Error::new(
720 io::ErrorKind::InvalidData,
721 format!("Module name is incorrect utf8: {e}"),
722 )
723 })?;
724
725 // Create a module from its descriptor / name / records.
726 let module = Module::new(module, module_name, cv_record, misc_record);
727
728 // If there's already a module at this address, something is wrong
729 // so we bail.
730 let previous_val = modules.insert(module.range.start, module);
731 if let Some(previous_val) = previous_val {
732 return Err(io::Error::new(
733 io::ErrorKind::InvalidData,
734 format!("Module {} already in the map", previous_val.path.display()),
735 ));
736 }
737
738 // Restore the saved cursor.
739 cursor.seek(io::SeekFrom::Start(pos))?;
740 }
741
742 // We're done!
743 Ok(modules)
744 }
745
746 pub fn with_file(_mapped_file: MappedFile<'a>) -> io::Result<UserDumpParser<'a>> {
747 // Grab a cursor to start parsing the bits.
748 let mut cursor = _mapped_file.cursor();
749
750 // Read the header.
751 let hdr = read_struct::<Header>(&mut cursor)?;
752
753 // If we don't see the expected signature, bail.
754 if hdr.signature != EXPECTED_DUMP_SIGNATURE {
755 return Err(io::Error::new(
756 io::ErrorKind::InvalidData,
757 format!("Header signature {:x} is unexpected", hdr.signature),
758 ));
759 }
760
761 // Check if the flags make sense.
762 if (hdr.flags & VALID_DUMP_FLAGS) != 0 {
763 return Err(io::Error::new(
764 io::ErrorKind::InvalidData,
765 format!("Header signature {:x} is unexpected", hdr.signature),
766 ));
767 }
768
769 // Move to the stream directory.
770 cursor.seek(io::SeekFrom::Start(hdr.stream_directory_rva.into()))?;
771
772 // Create a map to store where directories are stored at.
773 let mut directory_locations = collections::HashMap::new();
774
775 // Iterate through every entries.
776 for _ in 0..hdr.number_of_streams {
777 // Read the directory..
778 let directory = read_struct::<Directory>(&mut cursor)?;
779
780 // ..if we hit the `STREAM_TYPE_UNUSED`, we'll stop there.
781 if directory.stream_type == STREAM_TYPE_UNUSED {
782 break;
783 }
784
785 // Keep track of this directory.
786 directory_locations.insert(directory.stream_type, directory.location);
787 }
788
789 // Parsing directories in a certain orders make things easier, and below
790 // is the order we want.
791 let required = true;
792 let not_required = false;
793 let directory_parsing_order = [
794 // We need the architecture to decode threads.
795 (STREAM_TYPE_SYSTEM_INFO, required),
796 (STREAM_TYPE_EXCEPTION, not_required),
797 // We parse this stream to build MemBlock w/o any data.
798 (STREAM_TYPE_MEMORY_INFO_LIST, required),
799 // We associate the data when parsing that stream.
800 (STREAM_TYPE_MEMORY64_LIST, required),
801 (STREAM_TYPE_THREAD_LIST, not_required),
802 (STREAM_TYPE_MODULE_LIST, not_required),
803 ];
804
805 // Declare a bunch of state.
806 let mut arch = None;
807 let mut foreground_tid = None;
808 let mut mem_blocks = MemBlocks::new();
809 let mut modules = Modules::new();
810 let mut threads = Threads::new();
811
812 // Iterate through the directories in order.
813 for (directory_type, required) in directory_parsing_order {
814 // Check if we've encountered this stream directory
815 let directory_location = directory_locations.get(&directory_type);
816
817 // If we haven't, and that this directory is required, we bail.
818 // Otherwise we just go to the next.
819 let Some(directory_location) = directory_location else {
820 if required {
821 return Err(io::Error::new(
822 io::ErrorKind::InvalidData,
823 format!("The directory {directory_type} is required but not present"),
824 ));
825 }
826
827 continue;
828 };
829
830 // Move to where the stream is at.
831 cursor.seek(io::SeekFrom::Start(directory_location.rva.into()))?;
832
833 // Parse the streams we support.
834 match directory_type {
835 STREAM_TYPE_SYSTEM_INFO => arch = Some(Self::parse_system_info(&mut cursor)?),
836 STREAM_TYPE_EXCEPTION => foreground_tid = Some(Self::parse_exception(&mut cursor)?),
837 STREAM_TYPE_MEMORY_INFO_LIST => {
838 mem_blocks = Self::parse_mem_info_list(&mut cursor)?
839 }
840 STREAM_TYPE_MEMORY64_LIST => Self::parse_mem64_list(&mut cursor, &mut mem_blocks)?,
841 STREAM_TYPE_THREAD_LIST => {
842 threads = Self::parse_thread_list(&mut cursor, arch.unwrap())?
843 }
844 STREAM_TYPE_MODULE_LIST => modules = Self::parse_module_list(&mut cursor)?,
845 _ => unreachable!("Only parsing stream types we know about"),
846 };
847 }
848
849 // The system info stream is required to be parsed so we know we have a
850 // value in arch.
851 let arch = arch.unwrap();
852
853 // Phew, we have everything needed to build an instance!
854 Ok(UserDumpParser {
855 _mapped_file,
856 arch,
857 foreground_tid,
858 mem_blocks,
859 modules,
860 threads,
861 })
862 }
863}
864
865/// Peek for a `T` from the cursor.
866fn peek_struct<T>(cursor: &mut Cursor) -> io::Result<T> {
867 let mut s = mem::MaybeUninit::uninit();
868 let size_of_s = mem::size_of_val(&s);
869 let slice_over_s = unsafe { slice::from_raw_parts_mut(s.as_mut_ptr() as *mut u8, size_of_s) };
870
871 let pos = cursor.position();
872 cursor.read_exact(slice_over_s)?;
873 cursor.seek(io::SeekFrom::Start(pos))?;
874
875 Ok(unsafe { s.assume_init() })
876}
877
878/// Read a `T` from the cursor.
879fn read_struct<T>(cursor: &mut Cursor) -> io::Result<T> {
880 let s = peek_struct(cursor)?;
881 let size_of_s = mem::size_of_val(&s);
882
883 cursor.seek(io::SeekFrom::Current(size_of_s.try_into().unwrap()))?;
884
885 Ok(s)
886}
887
888/// Convert a slice of byte into an UTF16 Rust string.
889fn utf16_string_from_slice(slice: &[u8]) -> io::Result<String> {
890 // Every code point is 2 bytes, so we expect the length to be a multiple of
891 // 2.
892 if (slice.len() % 2) != 0 {
893 return Err(io::Error::new(
894 io::ErrorKind::InvalidData,
895 "Slice length needs to be % 2",
896 ));
897 }
898
899 // Iterate over chunks of 2 bytes to yield u16's.
900 let iter = slice.chunks(2).map(|c| u16::from_le_bytes([c[0], c[1]]));
901
902 // Decode the u16's into a String. If one of the u16 can't be decoded into a
903 // valid code point, then it fails. Otherwise they all get collected into a
904 // String.
905 char::decode_utf16(iter)
906 .collect::<Result<_, _>>()
907 .or(Err(io::Error::new(
908 io::ErrorKind::InvalidData,
909 "Module name is not UTF16",
910 )))
911}
912
913#[cfg(test)]
914mod tests {
915 use core::fmt::Debug;
916
917 use crate::UserDumpParser;
918
919 #[test]
920 fn assert_traits() {
921 fn assert_traits_<T: Send + Sync + Debug>() {}
922 assert_traits_::<UserDumpParser>();
923 }
924}