udmp_parser/
udmp_parser.rs

1// Axel '0vercl0k' Souchet - July 29 2023
2//! This module is where the parsing logic is implemented. The
3//! [`UserDumpParser`] can memory map a file by default but users can also build
4//! an instance from a slice they got from somewhere else.
5use std::io::{Read, Seek};
6use std::{collections, fmt, io, mem, ops, path, slice, vec};
7
8use crate::map::{Cursor, MappedFile};
9use crate::structs::*;
10
11/// Disables all access to the committed region of pages. An attempt to read
12/// from, write to, or execute the committed region results in an access
13/// violation.
14pub const PAGE_NOACCESS: u32 = 1;
15/// Enables read-only access to the committed region of pages. An attempt to
16/// write to the committed region results in an access violation. If Data
17/// Execution Prevention is enabled, an attempt to execute code in the committed
18/// region results in an access violation.
19pub const PAGE_READONLY: u32 = 2;
20/// Enables read-only or read/write access to the committed region of pages. If
21/// Data Execution Prevention is enabled, attempting to execute code in the
22/// committed region results in an access violation.
23pub const PAGE_READWRITE: u32 = 4;
24/// Enables read-only or copy-on-write access to a mapped view of a file mapping
25/// object. An attempt to write to a committed copy-on-write page results in a
26/// private copy of the page being made for the process. The private page is
27/// marked as PAGE_READWRITE, and the change is written to the new page. If Data
28/// Execution Prevention is enabled, attempting to execute code in the committed
29/// region results in an access violation.
30pub const PAGE_WRITECOPY: u32 = 8;
31/// Enables execute access to the committed region of pages. An attempt to write
32/// to the committed region results in an access violation.
33pub const PAGE_EXECUTE: u32 = 16;
34/// Enables execute or read-only access to the committed region of pages. An
35/// attempt to write to the committed region results in an access violation.
36pub const PAGE_EXECUTE_READ: u32 = 32;
37/// Enables execute, read-only, or read/write access to the committed region of
38/// pages.
39pub const PAGE_EXECUTE_READWRITE: u32 = 64;
40/// Enables execute, read-only, or copy-on-write access to a mapped view of a
41/// file mapping object. An attempt to write to a committed copy-on-write page
42/// results in a private copy of the page being made for the process. The
43/// private page is marked as PAGE_EXECUTE_READWRITE, and the change is written
44/// to the new page.
45pub const PAGE_EXECUTE_WRITECOPY: u32 = 128;
46/// Pages in the region become guard pages. Any attempt to access a guard page
47/// causes the system to raise a STATUS_GUARD_PAGE_VIOLATION exception and turn
48/// off the guard page status. Guard pages thus act as a one-time access alarm.
49pub const PAGE_GUARD: u32 = 0x1_00;
50/// Sets all pages to be non-cachable. Applications should not use this
51/// attribute except when explicitly required for a device. Using the
52/// interlocked functions with memory that is mapped with SEC_NOCACHE can result
53/// in an EXCEPTION_ILLEGAL_INSTRUCTION exception.
54pub const PAGE_NOCACHE: u32 = 0x2_00;
55/// Sets all pages to be write-combined. Applications should not use this
56/// attribute except when explicitly required for a device. Using the
57/// interlocked functions with memory that is mapped as write-combined can
58/// result in an EXCEPTION_ILLEGAL_INSTRUCTION exception.
59pub const PAGE_WRITECOMBINE: u32 = 0x4_00;
60
61/// The memory rights constants on Windows make it annoying to know if the page
62/// is readable / writable / executable, so we have to create our own masks.
63/// A page is readable if it is protected with any of the below rights.
64const READABLE: u32 = PAGE_READONLY
65    | PAGE_READWRITE
66    | PAGE_EXECUTE_READ
67    | PAGE_EXECUTE_READWRITE
68    | PAGE_EXECUTE_WRITECOPY
69    | PAGE_WRITECOPY;
70
71/// A page is writable if it is protected with any of the below rights.
72const WRITABLE: u32 = PAGE_READWRITE | PAGE_EXECUTE_READWRITE | PAGE_WRITECOPY;
73/// A page is executable if it is protected with any of the below rights.
74const EXECUTABLE: u32 =
75    PAGE_EXECUTE | PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_EXECUTE_WRITECOPY;
76
77/// A DLL loaded in the virtual address space.
78#[allow(clippy::len_without_is_empty)]
79#[derive(Default, Debug)]
80pub struct Module<'a> {
81    /// The range of where the module is loaded in memory at.
82    pub range: ops::Range<u64>,
83    /// PE checksum of the module.
84    pub checksum: u32,
85    /// Timestamp.
86    pub time_date_stamp: u32,
87    /// The module path on the file system.
88    pub path: path::PathBuf,
89    pub version_info: FixedFileInfo,
90    pub cv_record: &'a [u8],
91    pub misc_record: &'a [u8],
92}
93
94impl<'a> Module<'a> {
95    /// Build a new [`Module`] instance.
96    fn new(
97        entry: ModuleEntry,
98        module_name: String,
99        cv_record: &'a [u8],
100        misc_record: &'a [u8],
101    ) -> Self {
102        let start = entry.base_of_image;
103        let end = entry.base_of_image + entry.size_of_image as u64;
104        let range = ops::Range { start, end };
105        if range.is_empty() {
106            panic!("range is malformed");
107        }
108
109        Self {
110            range,
111            checksum: entry.checksum,
112            time_date_stamp: entry.time_date_stamp,
113            path: module_name.into(),
114            version_info: entry.version_info,
115            cv_record,
116            misc_record,
117        }
118    }
119
120    /// Get the file name of the module. This returns [`None`] if the file name
121    /// can't be converted to a Rust string.
122    pub fn file_name(&self) -> Option<&str> {
123        self.path.file_name().unwrap().to_str()
124    }
125
126    /// Get the address of where the module was loaded at.
127    pub fn start_addr(&self) -> u64 {
128        self.range.start
129    }
130
131    /// Get the address of where the last byte of the module was loaded at.
132    pub fn end_addr(&self) -> u64 {
133        self.range.end - 1
134    }
135
136    /// Get the length of the range of memory the module was loaded at.
137    pub fn len(&self) -> u64 {
138        self.range.end - self.range.start
139    }
140}
141
142/// A [`ThreadContext`] stores the thread contexts for the architecture that are
143/// supported by the library.
144#[derive(Debug)]
145pub enum ThreadContext {
146    /// The Intel x86 thread context.
147    X86(Box<ThreadContextX86>),
148    /// The Intel x64 thread context.
149    X64(Box<ThreadContextX64>),
150}
151
152/// Display the [`ThreadContext`] like WinDbg would.
153impl fmt::Display for ThreadContext {
154    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155        match self {
156            Self::X86(ctx) => ctx.fmt(f),
157            Self::X64(ctx) => ctx.fmt(f),
158        }
159    }
160}
161
162/// A thread that was running when the dump was generated.
163#[derive(Debug)]
164pub struct Thread {
165    /// The thread ID.
166    pub id: u32,
167    /// The suspend count counter cf [Freezing and Suspending Threads](https://learn.microsoft.com/en-us/windows-hardware/drivers/debugger/controlling-processes-and-threads).
168    pub suspend_count: u32,
169    /// The priority class cf [Priority Class](https://learn.microsoft.com/en-us/windows/win32/procthread/scheduling-priorities).
170    pub priority_class: u32,
171    /// Thread priority cf [Priority level](https://learn.microsoft.com/en-us/windows/win32/procthread/scheduling-priorities).
172    pub priority: u32,
173    /// The thread environment block address.
174    pub teb: u64,
175    /// The thread context.
176    context: ThreadContext,
177}
178
179impl Thread {
180    /// Build a new [`Thread`] instance.
181    fn new(entry: ThreadEntry, context: ThreadContext) -> Self {
182        Self {
183            id: entry.thread_id,
184            suspend_count: entry.suspend_count,
185            priority_class: entry.priority_class,
186            priority: entry.priority,
187            teb: entry.teb,
188            context,
189        }
190    }
191
192    /// Get a reference to the [`ThreadContext`].
193    pub fn context(&self) -> &ThreadContext {
194        &self.context
195    }
196}
197
198/// A block of memory in the address space that isn't a [`Module`]. [`MemBlock`]
199/// can have `data` associated with it but isn't a guarantee (think about a
200/// memory region that is mapped as `PAGE_NOACCESS`).
201#[derive(Default, Debug)]
202#[allow(clippy::len_without_is_empty)]
203pub struct MemBlock<'a> {
204    /// Range over the start/end address of the memory region.
205    pub range: ops::Range<u64>,
206    /// The base of the allocation that gave life to this memory region.
207    pub allocation_base: u64,
208    /// The page protection used at allocation time.
209    pub allocation_protect: u32,
210    /// The state of the memory region. See [State](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-memory_basic_information).
211    pub state: u32,
212    /// The page protection currently applied to the memory region.
213    pub protect: u32,
214    /// The type of memory region. See [Type](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-memory_basic_information).
215    pub type_: u32,
216    /// The [`MemBlock`]'s data.
217    pub data: &'a [u8],
218}
219
220impl<'a> MemBlock<'a> {
221    /// Is the memory region readable?
222    pub fn is_readable(&self) -> bool {
223        (self.protect & READABLE) != 0
224    }
225
226    /// Is the memory region writable?
227    pub fn is_writable(&self) -> bool {
228        (self.protect & WRITABLE) != 0
229    }
230
231    /// Is the memory region executable?
232    pub fn is_executable(&self) -> bool {
233        (self.protect & EXECUTABLE) != 0
234    }
235
236    /// Stringify the memory region state.
237    pub fn state_as_str(&self) -> &str {
238        match self.state {
239            0x10_00 => "MEM_COMMIT",
240            0x20_00 => "MEM_RESERVE",
241            0x1_00_00 => "MEM_FREE",
242            _ => "UNKNOWN",
243        }
244    }
245
246    /// Stringify the memory region type.
247    pub fn type_as_str(&self) -> &str {
248        if self.state == 0x1_00_00 {
249            return "";
250        }
251
252        match self.type_ {
253            0x2_00_00 => "MEM_PRIVATE",
254            0x4_00_00 => "MEM_MAPPED",
255            0x1_00_00_00 => "MEM_IMAGE",
256            _ => "UNKNOWN",
257        }
258    }
259
260    /// Stringify the memory region protection.
261    pub fn protect_as_str(&self) -> String {
262        if self.protect == 0 {
263            return "".into();
264        }
265
266        // Those bits are the only ones that can be combined with the page
267        // protections from below. So strip those first off `protect`.
268        let bits = collections::HashMap::from([
269            (PAGE_GUARD, "PAGE_GUARD"),
270            (PAGE_NOCACHE, "PAGE_NOCACHE"),
271            (PAGE_WRITECOMBINE, "PAGE_WRITECOMBINE"),
272        ]);
273
274        // This is where the parts of the stringified mask are stored in.
275        let mut parts = vec::Vec::new();
276        let mut protect = self.protect;
277
278        // Walk through the bits to check if turned on.
279        for (mask, str) in bits.iter() {
280            // If the current bit isn't set, skip.
281            if (protect & mask) == 0 {
282                continue;
283            }
284
285            // If it is set, strip it off from `protect` and push its
286            // stringified value in the vector.
287            protect &= !mask;
288            parts.push(*str);
289        }
290
291        // Now we can handle the 'normal' page properties.
292        parts.push(match protect {
293            PAGE_NOACCESS => "PAGE_NOACCESS",
294            PAGE_READONLY => "PAGE_READONLY",
295            PAGE_READWRITE => "PAGE_READWRITE",
296            PAGE_WRITECOPY => "PAGE_WRITECOPY",
297            PAGE_EXECUTE => "PAGE_EXECUTE",
298            PAGE_EXECUTE_READ => "PAGE_EXECUTE_READ",
299            PAGE_EXECUTE_READWRITE => "PAGE_EXECUTE_READWRITE",
300            PAGE_EXECUTE_WRITECOPY => "PAGE_EXECUTE_WRITECOPY",
301            _ => "UNKNOWN",
302        });
303
304        parts.join(" | ")
305    }
306
307    /// Get a slice over the [`MemBlock`]'s data from its absolute address.
308    ///
309    /// If the dump had a memory block of size 4 bytes starting at address
310    /// 0xdead then calling `data_from(0xdead+1)` returns a slice over the
311    /// last 3 bytes of the memory block. This is useful when you don't need
312    /// to reason about offsets.
313    pub fn data_from(&self, addr: u64) -> Option<&[u8]> {
314        // If the memory block is empty return `None`. Also bail if this
315        // `MemBlock` doesn't contain the address.
316        if self.data.is_empty() || !self.range.contains(&addr) {
317            return None;
318        }
319
320        // `addr` is contained in the range, so this is safe.
321        let offset = addr - self.range.start;
322
323        // Return the slice to the user.
324        Some(&self.data[offset.try_into().unwrap()..])
325    }
326
327    /// Get the address of where this [`MemBlock`] was at in memory.
328    pub fn start_addr(&self) -> u64 {
329        self.range.start
330    }
331
332    /// Get the end address of where this [`MemBlock`] was at in memory.
333    ///
334    /// Note that the underlying range is not inclusive, so this address is
335    /// pointing right after the last byte's address.
336    pub fn end_addr(&self) -> u64 {
337        self.range.end
338    }
339
340    /// Get the size of the [`MemBlock`].
341    ///
342    /// Note that a region of memory can exists without having any `data`
343    /// associated with it. This method returns the range len, not `data`'s len.
344    ///
345    /// An example is a memory region mapped as `PAGE_NOACCESS`; it exists in
346    /// the address space but has no content.
347    pub fn len(&self) -> u64 {
348        self.range.end - self.range.start
349    }
350}
351
352/// Convert a [`MemoryInfo`] into a [`MemBlock`].
353impl<'a> From<MemoryInfo> for MemBlock<'a> {
354    fn from(value: MemoryInfo) -> Self {
355        Self {
356            range: value.base_address..(value.base_address + value.region_size),
357            allocation_base: value.allocation_base,
358            allocation_protect: value.allocation_protect,
359            state: value.state,
360            protect: value.protect,
361            type_: value.type_,
362            ..Default::default()
363        }
364    }
365}
366
367/// Map a base address to a [`MemBlock`].
368pub type MemBlocks<'a> = collections::BTreeMap<u64, MemBlock<'a>>;
369
370/// Map a thread id to a [`Thread`].
371pub type Threads = collections::BTreeMap<u32, Thread>;
372
373/// Map a base address to a [`Module`].
374pub type Modules<'a> = collections::BTreeMap<u64, Module<'a>>;
375
376/// Architectures supported by the library.
377#[derive(Debug, Clone, Copy)]
378pub enum Arch {
379    /// Intel x86.
380    X86,
381    /// Intel x64.
382    X64,
383}
384
385/// This stores  useful information fished out of of Windows minidump file:
386/// thread contexts and memory blocks.
387#[derive(Debug)]
388pub struct UserDumpParser<'a> {
389    /// The thread id of the foreground thread.
390    pub foreground_tid: Option<u32>,
391    /// The architecture of the dumped process.
392    arch: Arch,
393    /// A map of [`MemBlock`]s.
394    mem_blocks: MemBlocks<'a>,
395    /// A map of [`Module`].
396    modules: Modules<'a>,
397    /// A map of [`Thread`].
398    threads: Threads,
399    /// This is where we hold the backing data. Either it is a memory mapped
400    /// file, or a slice that needs to live as long as this.
401    _mapped_file: MappedFile<'a>,
402}
403
404impl<'a> UserDumpParser<'a> {
405    /// Create an instance from a filepath. This memory maps the file and parses
406    /// it.
407    pub fn new<S: AsRef<path::Path>>(path: S) -> io::Result<UserDumpParser<'a>> {
408        let mapped_file = MappedFile::new(path)?;
409        Self::with_file(mapped_file)
410    }
411
412    /// Create an instance from something that dereference to a slice of bytes.
413    pub fn with_slice(
414        slice: &'a impl std::ops::Deref<Target = [u8]>,
415    ) -> io::Result<UserDumpParser<'a>> {
416        Self::with_file(MappedFile::from(slice.deref()))
417    }
418
419    /// Is the architeture X64?
420    pub fn is_arch_x64(&self) -> bool {
421        matches!(self.arch, Arch::X64)
422    }
423
424    /// Is the architecture X86?
425    pub fn is_arch_x86(&self) -> bool {
426        matches!(self.arch, Arch::X86)
427    }
428
429    /// Get a reference to the base address -> [`Module`] map.
430    pub fn modules(&self) -> &Modules {
431        &self.modules
432    }
433
434    /// Find a [`Module`] that includes `address` in its range.
435    pub fn get_module(&self, address: u64) -> Option<&Module> {
436        self.modules
437            .values()
438            .find(|module| module.range.contains(&address))
439    }
440
441    /// Get a reference to the TID -> [`Thread`] map.
442    pub fn threads(&self) -> &Threads {
443        &self.threads
444    }
445
446    /// Find a [`Thread`] with a specific TID.
447    pub fn get_thread(&self, id: u32) -> Option<&Thread> {
448        self.threads.values().find(|thread| thread.id == id)
449    }
450
451    /// Get a reference to the base address -> [`MemBlock`] map.
452    pub fn mem_blocks(&self) -> &MemBlocks {
453        &self.mem_blocks
454    }
455
456    /// Find a [`MemBlock`] that includes `address` in its range.
457    pub fn get_mem_block(&self, address: u64) -> Option<&MemBlock> {
458        self.mem_blocks
459            .values()
460            .find(|block| block.range.contains(&address))
461    }
462
463    /// Utility to get a slice from a [`LocationDescriptor32`] safely.
464    fn slice_from_location_descriptor(
465        reader: &Cursor,
466        location: LocationDescriptor32,
467    ) -> io::Result<&'a [u8]> {
468        // Grab the offset and the wanted len.
469        let offset = location.rva.try_into().unwrap();
470        let len = location.data_size.try_into().unwrap();
471
472        // Grab a reference on the underlying slice.
473        let slice_ref = reader.get_ref();
474
475        // Split the slice in two. We only care about the tail.
476        let (_, tail) = slice_ref.split_at(offset);
477
478        // Make sure the tail slice is big enough.
479        if tail.len() < len {
480            return Err(io::Error::new(
481                io::ErrorKind::UnexpectedEof,
482                "not enough data for slicing",
483            ));
484        }
485
486        // Make sure we hold `from_raw_parts`'s contract.
487        if len > isize::MAX.try_into().unwrap() {
488            panic!("len > isize::MAX");
489        }
490
491        // Build the slice!
492        Ok(unsafe { slice::from_raw_parts(tail.as_ptr(), len) })
493    }
494
495    /// Parse the system info stream to know which architecture is used.
496    fn parse_system_info(cursor: &mut Cursor) -> io::Result<Arch> {
497        // Read the stream info.
498        let system_info = read_struct::<SystemInfoStream>(cursor)?;
499
500        // Build the value of the enum safely.
501        Ok(match system_info.processor_arch {
502            ARCH_X86 => Arch::X86,
503            ARCH_X64 => Arch::X64,
504            _ => panic!("Unsupported architecture {:x}", system_info.processor_arch),
505        })
506    }
507
508    /// Parse the exception stream to know figure out if there's a foreground
509    /// TID.
510    fn parse_exception(cursor: &mut Cursor) -> io::Result<u32> {
511        // Read the exception stream.
512        let exception = read_struct::<ExceptionStream>(cursor)?;
513
514        // Return the TID.
515        Ok(exception.thread_id)
516    }
517
518    /// Parse the memory info list stream to build the [`MemBlocks`] map.
519    fn parse_mem_info_list(cursor: &mut Cursor) -> io::Result<MemBlocks<'a>> {
520        // Create storage for the memory blocks.
521        let mut mem_blocks = MemBlocks::new();
522
523        // Read the memory info list stream.
524        let mem_info_list = read_struct::<MemoryInfoListStream>(cursor)?;
525
526        // Ensure that each entry is at least as big as what we expected.
527        let mem_info_size = mem::size_of::<MemoryInfo>() as u32;
528        let size_of_entry = mem_info_list.size_of_entry;
529        if size_of_entry < mem_info_size {
530            return Err(io::Error::new(
531                io::ErrorKind::InvalidData,
532                format!(
533                    "MemoryInfo's size ({}) doesn't match the dump ({})",
534                    mem_info_size, mem_info_list.size_of_entry
535                ),
536            ));
537        }
538
539        // Iterate through every entries.
540        for _ in 0..mem_info_list.number_of_entries {
541            // Read the memory info structure.
542            let mem_info = peek_struct::<MemoryInfo>(cursor)?;
543
544            // The key in the map is the base address.
545            let key = mem_info.base_address;
546
547            // If we already inserted this address, there's something wrong so
548            // bail.
549            let previous_val = mem_blocks.insert(key, mem_info.into());
550            if previous_val.is_some() {
551                return Err(io::Error::new(
552                    io::ErrorKind::InvalidData,
553                    format!("Address {} already in the mem map", key),
554                ));
555            }
556
557            // Move on to the next entry.
558            cursor.seek(io::SeekFrom::Current(size_of_entry.into()))?;
559        }
560
561        // We're done.
562        Ok(mem_blocks)
563    }
564
565    /// Parse the memory64 list stream to associate data to the MemBlock we
566    /// parsed from the memory info list stream. That's why we parse the memory
567    /// info list stream first.
568    fn parse_mem64_list(cursor: &mut Cursor, mem_blocks: &mut MemBlocks<'a>) -> io::Result<()> {
569        // Read the memory64 list stream.
570        let mem_list = read_struct::<Memory64ListStream>(cursor)?;
571
572        // Grab the starting offset.
573        let mut data_offset = mem_list.base_rva;
574
575        // Iterate through every entries.
576        for _ in 0..mem_list.number_of_memory_ranges {
577            // Read a descriptor.
578            let descriptor = read_struct::<MemoryDescriptor64>(cursor)?;
579
580            // Get a reference to the associated MemBlock off `mem_blocks`.
581            let entry = mem_blocks
582                .get_mut(&descriptor.start_of_memory_range)
583                .ok_or(io::Error::new(
584                    io::ErrorKind::InvalidData,
585                    format!(
586                        "Address {} in Memory64ListStream but not in MemoryInfoListStream",
587                        descriptor.start_of_memory_range
588                    ),
589                ))?;
590
591            // Read the slice of bytes and associate it to the MemBlock instance.
592            entry.data = Self::slice_from_location_descriptor(cursor, LocationDescriptor32 {
593                rva: data_offset.try_into().unwrap(),
594                data_size: descriptor.data_size.try_into().unwrap(),
595            })?;
596
597            // Bump the offset by the size of this region to find where the next
598            // data slice is at.
599            data_offset = data_offset.checked_add(descriptor.data_size).unwrap();
600        }
601
602        // We're done!
603        Ok(())
604    }
605
606    /// Parse the tread list and extract their contexts.
607    fn parse_thread_list(cursor: &mut Cursor, arch: Arch) -> io::Result<Threads> {
608        // Create the map of threads.
609        let mut threads = Threads::new();
610
611        // Read the thread list.
612        let thread_list = read_struct::<ThreadList>(cursor)?;
613
614        // Iterate through every entries.
615        for _ in 0..thread_list.number_of_threads {
616            // Read the entry.
617            let thread = read_struct::<ThreadEntry>(cursor)?;
618
619            // Save the current position.
620            let pos = cursor.stream_position()?;
621
622            // Grab the slice of its context.
623            let thread_context_slice =
624                Self::slice_from_location_descriptor(cursor, thread.thread_context)?;
625
626            // Let's make sense of this slice based on what architectcure it is.
627            let thread_context = match arch {
628                // Read a ThreadContextX86 context if the slice is big enough.
629                Arch::X86 => {
630                    if thread_context_slice.len() < mem::size_of::<ThreadContextX86>() {
631                        return Err(io::Error::new(
632                            io::ErrorKind::InvalidData,
633                            format!(
634                                "The X86 thread context for TID {} has an unexpected size",
635                                thread.thread_id
636                            ),
637                        ));
638                    }
639
640                    // Build a reference to a ThreadContextX86 at this address.
641                    let ptr = thread_context_slice.as_ptr() as *const ThreadContextX86;
642                    ThreadContext::X86(Box::new(unsafe { std::ptr::read_unaligned(ptr) }))
643                }
644                // Read a ThreadContextX86 context if the slice is big enough.
645                Arch::X64 => {
646                    if thread_context_slice.len() < mem::size_of::<ThreadContextX64>() {
647                        return Err(io::Error::new(
648                            io::ErrorKind::InvalidData,
649                            format!(
650                                "The X64 thread context for TID {} has an unexpected size",
651                                thread.thread_id
652                            ),
653                        ));
654                    }
655
656                    // Build a reference to a ThreadContextX64 at this address.
657                    let ptr = thread_context_slice.as_ptr() as *const ThreadContextX64;
658                    ThreadContext::X64(Box::new(unsafe { std::ptr::read_unaligned(ptr) }))
659                }
660            };
661
662            // The key in the map is the thread id.
663            let key = thread.thread_id;
664
665            // Create a Thread from its context and the descriptor.
666            let thread = Thread::new(thread, thread_context);
667
668            // If we've already encountered a thread with this id, then let's
669            // bail.
670            let previous_val = threads.insert(key, thread);
671            if previous_val.is_some() {
672                return Err(io::Error::new(
673                    io::ErrorKind::InvalidData,
674                    format!("Thread {} already in the map", key),
675                ));
676            }
677
678            // Restore the position to get ready to parse the next entry.
679            cursor.seek(io::SeekFrom::Start(pos))?;
680        }
681
682        Ok(threads)
683    }
684
685    /// Parse the module list.
686    fn parse_module_list(cursor: &mut Cursor) -> io::Result<Modules<'a>> {
687        // Build the map of modules.
688        let mut modules = Modules::new();
689
690        // Read the module list.
691        let module_list = read_struct::<ModuleList>(cursor)?;
692
693        // Iterate through every entries.
694        for _ in 0..module_list.number_of_modules {
695            // Read the module entry.
696            let module = read_struct::<ModuleEntry>(cursor)?;
697
698            // Save the position.
699            let pos = cursor.stream_position()?;
700
701            // Grab the CV / misc record slices.
702            let cv_record = Self::slice_from_location_descriptor(cursor, module.cv_record)?;
703            let misc_record = Self::slice_from_location_descriptor(cursor, module.misc_record)?;
704
705            // Travel to where the module name is stored at.
706            cursor.seek(io::SeekFrom::Start(module.module_name_rva.into()))?;
707
708            // Read its length.
709            let module_name_length = read_struct::<u32>(cursor)?.try_into().unwrap();
710
711            // Allocate a backing buffer.
712            let mut module_name = vec![0; module_name_length];
713
714            // Read the module name off the slice into the buffer.
715            cursor.read_exact(module_name.as_mut_slice())?;
716
717            // Convert the module name into a Rust string.
718            let module_name = utf16_string_from_slice(&module_name).map_err(|e| {
719                io::Error::new(
720                    io::ErrorKind::InvalidData,
721                    format!("Module name is incorrect utf8: {e}"),
722                )
723            })?;
724
725            // Create a module from its descriptor / name / records.
726            let module = Module::new(module, module_name, cv_record, misc_record);
727
728            // If there's already a module at this address, something is wrong
729            // so we bail.
730            let previous_val = modules.insert(module.range.start, module);
731            if let Some(previous_val) = previous_val {
732                return Err(io::Error::new(
733                    io::ErrorKind::InvalidData,
734                    format!("Module {} already in the map", previous_val.path.display()),
735                ));
736            }
737
738            // Restore the saved cursor.
739            cursor.seek(io::SeekFrom::Start(pos))?;
740        }
741
742        // We're done!
743        Ok(modules)
744    }
745
746    pub fn with_file(_mapped_file: MappedFile<'a>) -> io::Result<UserDumpParser<'a>> {
747        // Grab a cursor to start parsing the bits.
748        let mut cursor = _mapped_file.cursor();
749
750        // Read the header.
751        let hdr = read_struct::<Header>(&mut cursor)?;
752
753        // If we don't see the expected signature, bail.
754        if hdr.signature != EXPECTED_DUMP_SIGNATURE {
755            return Err(io::Error::new(
756                io::ErrorKind::InvalidData,
757                format!("Header signature {:x} is unexpected", hdr.signature),
758            ));
759        }
760
761        // Check if the flags make sense.
762        if (hdr.flags & VALID_DUMP_FLAGS) != 0 {
763            return Err(io::Error::new(
764                io::ErrorKind::InvalidData,
765                format!("Header signature {:x} is unexpected", hdr.signature),
766            ));
767        }
768
769        // Move to the stream directory.
770        cursor.seek(io::SeekFrom::Start(hdr.stream_directory_rva.into()))?;
771
772        // Create a map to store where directories are stored at.
773        let mut directory_locations = collections::HashMap::new();
774
775        // Iterate through every entries.
776        for _ in 0..hdr.number_of_streams {
777            // Read the directory..
778            let directory = read_struct::<Directory>(&mut cursor)?;
779
780            // ..if we hit the `STREAM_TYPE_UNUSED`, we'll stop there.
781            if directory.stream_type == STREAM_TYPE_UNUSED {
782                break;
783            }
784
785            // Keep track of this directory.
786            directory_locations.insert(directory.stream_type, directory.location);
787        }
788
789        // Parsing directories in a certain orders make things easier, and below
790        // is the order we want.
791        let required = true;
792        let not_required = false;
793        let directory_parsing_order = [
794            // We need the architecture to decode threads.
795            (STREAM_TYPE_SYSTEM_INFO, required),
796            (STREAM_TYPE_EXCEPTION, not_required),
797            // We parse this stream to build MemBlock w/o any data.
798            (STREAM_TYPE_MEMORY_INFO_LIST, required),
799            // We associate the data when parsing that stream.
800            (STREAM_TYPE_MEMORY64_LIST, required),
801            (STREAM_TYPE_THREAD_LIST, not_required),
802            (STREAM_TYPE_MODULE_LIST, not_required),
803        ];
804
805        // Declare a bunch of state.
806        let mut arch = None;
807        let mut foreground_tid = None;
808        let mut mem_blocks = MemBlocks::new();
809        let mut modules = Modules::new();
810        let mut threads = Threads::new();
811
812        // Iterate through the directories in order.
813        for (directory_type, required) in directory_parsing_order {
814            // Check if we've encountered this stream directory
815            let directory_location = directory_locations.get(&directory_type);
816
817            // If we haven't, and that this directory is required, we bail.
818            // Otherwise we just go to the next.
819            let Some(directory_location) = directory_location else {
820                if required {
821                    return Err(io::Error::new(
822                        io::ErrorKind::InvalidData,
823                        format!("The directory {directory_type} is required but not present"),
824                    ));
825                }
826
827                continue;
828            };
829
830            // Move to where the stream is at.
831            cursor.seek(io::SeekFrom::Start(directory_location.rva.into()))?;
832
833            // Parse the streams we support.
834            match directory_type {
835                STREAM_TYPE_SYSTEM_INFO => arch = Some(Self::parse_system_info(&mut cursor)?),
836                STREAM_TYPE_EXCEPTION => foreground_tid = Some(Self::parse_exception(&mut cursor)?),
837                STREAM_TYPE_MEMORY_INFO_LIST => {
838                    mem_blocks = Self::parse_mem_info_list(&mut cursor)?
839                }
840                STREAM_TYPE_MEMORY64_LIST => Self::parse_mem64_list(&mut cursor, &mut mem_blocks)?,
841                STREAM_TYPE_THREAD_LIST => {
842                    threads = Self::parse_thread_list(&mut cursor, arch.unwrap())?
843                }
844                STREAM_TYPE_MODULE_LIST => modules = Self::parse_module_list(&mut cursor)?,
845                _ => unreachable!("Only parsing stream types we know about"),
846            };
847        }
848
849        // The system info stream is required to be parsed so we know we have a
850        // value in arch.
851        let arch = arch.unwrap();
852
853        // Phew, we have everything needed to build an instance!
854        Ok(UserDumpParser {
855            _mapped_file,
856            arch,
857            foreground_tid,
858            mem_blocks,
859            modules,
860            threads,
861        })
862    }
863}
864
865/// Peek for a `T` from the cursor.
866fn peek_struct<T>(cursor: &mut Cursor) -> io::Result<T> {
867    let mut s = mem::MaybeUninit::uninit();
868    let size_of_s = mem::size_of_val(&s);
869    let slice_over_s = unsafe { slice::from_raw_parts_mut(s.as_mut_ptr() as *mut u8, size_of_s) };
870
871    let pos = cursor.position();
872    cursor.read_exact(slice_over_s)?;
873    cursor.seek(io::SeekFrom::Start(pos))?;
874
875    Ok(unsafe { s.assume_init() })
876}
877
878/// Read a `T` from the cursor.
879fn read_struct<T>(cursor: &mut Cursor) -> io::Result<T> {
880    let s = peek_struct(cursor)?;
881    let size_of_s = mem::size_of_val(&s);
882
883    cursor.seek(io::SeekFrom::Current(size_of_s.try_into().unwrap()))?;
884
885    Ok(s)
886}
887
888/// Convert a slice of byte into an UTF16 Rust string.
889fn utf16_string_from_slice(slice: &[u8]) -> io::Result<String> {
890    // Every code point is 2 bytes, so we expect the length to be a multiple of
891    // 2.
892    if (slice.len() % 2) != 0 {
893        return Err(io::Error::new(
894            io::ErrorKind::InvalidData,
895            "Slice length needs to be % 2",
896        ));
897    }
898
899    // Iterate over chunks of 2 bytes to yield u16's.
900    let iter = slice.chunks(2).map(|c| u16::from_le_bytes([c[0], c[1]]));
901
902    // Decode the u16's into a String. If one of the u16 can't be decoded into a
903    // valid code point, then it fails. Otherwise they all get collected into a
904    // String.
905    char::decode_utf16(iter)
906        .collect::<Result<_, _>>()
907        .or(Err(io::Error::new(
908            io::ErrorKind::InvalidData,
909            "Module name is not UTF16",
910        )))
911}
912
913#[cfg(test)]
914mod tests {
915    use core::fmt::Debug;
916
917    use crate::UserDumpParser;
918
919    #[test]
920    fn assert_traits() {
921        fn assert_traits_<T: Send + Sync + Debug>() {}
922        assert_traits_::<UserDumpParser>();
923    }
924}
udmp_parser/udmp_parser.rs

udmp_parser/
udmp_parser.rs