symbolic_debuginfo/
pe.rs

1//! Support for Portable Executables, an extension of COFF used on Windows.
2
3use std::borrow::Cow;
4use std::error::Error;
5use std::fmt;
6use std::io::Write;
7
8use gimli::RunTimeEndian;
9use goblin::pe;
10use scroll::{Pread, LE};
11use thiserror::Error;
12
13use symbolic_common::{Arch, AsSelf, CodeId, DebugId};
14
15use crate::base::*;
16use crate::dwarf::*;
17
18pub use goblin::pe::exception::*;
19pub use goblin::pe::section_table::SectionTable;
20
21/// An error when dealing with [`PEObject`](struct.PEObject.html).
22#[derive(Debug, Error)]
23#[error("invalid PE file")]
24pub struct PeError {
25    #[source]
26    source: Option<Box<dyn Error + Send + Sync + 'static>>,
27}
28
29impl PeError {
30    /// Creates a new PE error from an arbitrary error payload.
31    fn new<E>(source: E) -> Self
32    where
33        E: Into<Box<dyn Error + Send + Sync>>,
34    {
35        let source = Some(source.into());
36        Self { source }
37    }
38}
39
40/// Detects if the PE is a packer stub.
41///
42/// Such files usually only contain empty stubs in their `.pdata` and `.text` sections, and unwind
43/// information cannot be retrieved reliably. Usually, the exception table is present, but unwind
44/// info points into a missing section.
45fn is_pe_stub(pe: &pe::PE<'_>) -> bool {
46    let mut has_stub = false;
47    let mut pdata_empty = false;
48
49    for section in &pe.sections {
50        let name = section.name().unwrap_or_default();
51        pdata_empty = pdata_empty || name == ".pdata" && section.size_of_raw_data == 0;
52        has_stub = has_stub || name.starts_with(".stub");
53    }
54
55    pdata_empty && has_stub
56}
57
58/// Portable Executable, an extension of COFF used on Windows.
59///
60/// This file format is used to carry program code. Debug information is usually moved to a separate
61/// container, [`PdbObject`]. The PE file contains a reference to the PDB and vice versa to verify
62/// that the files belong together.
63///
64/// In rare instances, PE files might contain debug information.
65/// This is supported for DWARF debug information.
66///
67/// [`PdbObject`]: ../pdb/struct.PdbObject.html
68pub struct PeObject<'data> {
69    pe: pe::PE<'data>,
70    data: &'data [u8],
71    is_stub: bool,
72}
73
74impl<'data> PeObject<'data> {
75    /// Tests whether the buffer could contain an PE object.
76    pub fn test(data: &[u8]) -> bool {
77        matches!(
78            data.get(0..2)
79                .and_then(|data| data.pread_with::<u16>(0, LE).ok()),
80            Some(pe::header::DOS_MAGIC)
81        )
82    }
83
84    /// Tries to parse a PE object from the given slice.
85    pub fn parse(data: &'data [u8]) -> Result<Self, PeError> {
86        let pe = pe::PE::parse(data).map_err(PeError::new)?;
87        let is_stub = is_pe_stub(&pe);
88        Ok(PeObject { pe, data, is_stub })
89    }
90
91    /// The container file format, which is always `FileFormat::Pe`.
92    pub fn file_format(&self) -> FileFormat {
93        FileFormat::Pe
94    }
95
96    /// The code identifier of this object.
97    ///
98    /// The code identifier consists of the `time_date_stamp` field id the COFF header, followed by
99    /// the `size_of_image` field in the optional header. If the optional PE header is not present,
100    /// this identifier is `None`.
101    pub fn code_id(&self) -> Option<CodeId> {
102        let header = &self.pe.header;
103        let optional_header = header.optional_header.as_ref()?;
104
105        let timestamp = header.coff_header.time_date_stamp;
106        let size_of_image = optional_header.windows_fields.size_of_image;
107        let string = format!("{timestamp:08x}{size_of_image:x}");
108        Some(CodeId::new(string))
109    }
110
111    /// The debug information identifier of this PE.
112    ///
113    /// Since debug information is usually stored in an external
114    /// [`PdbObject`](crate::pdb::PdbObject), this identifier actually refers to the
115    /// PDB. While strictly the filename of the PDB would also be necessary fully resolve
116    /// it, in most instances the GUID and age contained in this identifier are sufficient.
117    pub fn debug_id(&self) -> DebugId {
118        self.pe
119            .debug_data
120            .as_ref()
121            .and_then(|debug_data| {
122                debug_data
123                    .codeview_pdb70_debug_info
124                    .as_ref()
125                    .map(|cv_record| (debug_data.image_debug_directory, cv_record))
126            })
127            .and_then(|(debug_directory, cv_record)| {
128                let guid = &cv_record.signature;
129
130                // Deterministic PE files have a different debug_id format:
131                //
132                // > Version Major=any, Minor=0x504d of the data format has the same structure as above.
133                // > The Age shall be 1. The format of the .pdb file that this PE/COFF file was built with is Portable PDB.
134                // > The Major version specified in the entry indicates the version of the Portable PDB format.
135                // > Together 16B of the Guid concatenated with 4B of the TimeDateStamp field of the entry form a PDB ID that should be used to match the PE/COFF image with the associated PDB (instead of Guid and Age).
136                // > Matching PDB ID is stored in the #Pdb stream of the .pdb file.
137                //
138                // See https://github.com/dotnet/runtime/blob/main/docs/design/specs/PE-COFF.md#codeview-debug-directory-entry-type-2
139                let age = if debug_directory.minor_version == 0x504d {
140                    debug_directory.time_date_stamp
141                } else {
142                    cv_record.age
143                };
144
145                DebugId::from_guid_age(guid, age).ok()
146            })
147            .unwrap_or_default()
148    }
149
150    /// The name of the referenced PDB file.
151    pub fn debug_file_name(&self) -> Option<Cow<'_, str>> {
152        self.pe
153            .debug_data
154            .as_ref()
155            .and_then(|debug_data| debug_data.codeview_pdb70_debug_info.as_ref())
156            .and_then(|debug_info| {
157                debug_info
158                    .filename
159                    .iter()
160                    .position(|&c| c == 0)
161                    .map(|nul_byte| String::from_utf8_lossy(&debug_info.filename[..nul_byte]))
162            })
163    }
164
165    /// The CPU architecture of this object, as specified in the COFF header.
166    pub fn arch(&self) -> Arch {
167        let machine = self.pe.header.coff_header.machine;
168        crate::pdb::arch_from_machine(machine.into())
169    }
170
171    /// The kind of this object, as specified in the PE header.
172    pub fn kind(&self) -> ObjectKind {
173        if self.pe.is_lib {
174            ObjectKind::Library
175        } else if self.is_stub {
176            ObjectKind::Other
177        } else {
178            ObjectKind::Executable
179        }
180    }
181
182    /// The address at which the image prefers to be loaded into memory.
183    ///
184    /// ELF files store all internal addresses as if it was loaded at that address. When the image
185    /// is actually loaded, that spot might already be taken by other images and so it must be
186    /// relocated to a new address. During load time, the loader rewrites all addresses in the
187    /// program code to match the new load address so that there is no runtime overhead when
188    /// executing the code.
189    ///
190    /// Addresses used in `symbols` or `debug_session` have already been rebased relative to that
191    /// load address, so that the caller only has to deal with addresses relative to the actual
192    /// start of the image.
193    pub fn load_address(&self) -> u64 {
194        self.pe.image_base as u64
195    }
196
197    /// Determines whether this object exposes a public symbol table.
198    pub fn has_symbols(&self) -> bool {
199        !self.pe.exports.is_empty()
200    }
201
202    /// Returns an iterator over symbols in the public symbol table.
203    pub fn symbols(&self) -> PeSymbolIterator<'data, '_> {
204        PeSymbolIterator {
205            exports: self.pe.exports.iter(),
206        }
207    }
208
209    /// Returns an ordered map of symbols in the symbol table.
210    pub fn symbol_map(&self) -> SymbolMap<'data> {
211        self.symbols().collect()
212    }
213
214    /// Determines whether this object contains debug information.
215    ///
216    /// Not usually the case, except for PE's generated by some alternative toolchains
217    /// which contain DWARF debug info.
218    pub fn has_debug_info(&self) -> bool {
219        self.section(".debug_info").is_some()
220    }
221
222    /// Determines whether this object contains embedded source.
223    pub fn has_sources(&self) -> bool {
224        false
225    }
226
227    /// Determines whether this object is malformed and was only partially parsed
228    pub fn is_malformed(&self) -> bool {
229        false
230    }
231
232    /// Constructs a debugging session.
233    ///
234    /// A debugging session loads certain information from the object file and creates caches for
235    /// efficient access to various records in the debug information. Since this can be quite a
236    /// costly process, try to reuse the debugging session as long as possible.
237    ///
238    /// PE files usually don't have embedded debugging information,
239    /// but some toolchains (e.g. MinGW) generate DWARF debug info.
240    ///
241    /// Constructing this session will also work if the object does not contain debugging
242    /// information, in which case the session will be a no-op. This can be checked via
243    /// [`has_debug_info`](struct.PeObject.html#method.has_debug_info).
244    pub fn debug_session(&self) -> Result<DwarfDebugSession<'data>, DwarfError> {
245        let symbols = self.symbol_map();
246        DwarfDebugSession::parse(self, symbols, self.load_address() as i64, self.kind())
247    }
248
249    /// Determines whether this object contains stack unwinding information.
250    pub fn has_unwind_info(&self) -> bool {
251        !self.is_stub && self.exception_data().is_some_and(|e| !e.is_empty())
252    }
253
254    /// Returns the raw data of the PE file.
255    pub fn data(&self) -> &'data [u8] {
256        self.data
257    }
258
259    /// A list of the sections in this PE binary, used to resolve virtual addresses.
260    pub fn sections(&self) -> &[SectionTable] {
261        &self.pe.sections
262    }
263
264    /// Returns the `SectionTable` for the section with this name, if present.
265    pub fn section(&self, name: &str) -> Option<SectionTable> {
266        for s in &self.pe.sections {
267            let sect_name = s.name();
268            if sect_name.is_ok() && sect_name.unwrap() == name {
269                return Some(s.clone());
270            }
271        }
272        None
273    }
274
275    /// Returns exception data containing unwind information.
276    pub fn exception_data(&self) -> Option<&ExceptionData<'_>> {
277        if self.is_stub {
278            None
279        } else {
280            self.pe.exception_data.as_ref()
281        }
282    }
283
284    /// Returns the raw buffer of Embedded Portable PDB Debug directory entry, if any.
285    pub fn embedded_ppdb(&self) -> Result<Option<PeEmbeddedPortablePDB<'data>>, PeError> {
286        // Note: This is currently not supported by goblin, see https://github.com/m4b/goblin/issues/314
287        let Some(opt_header) = self.pe.header.optional_header else {
288            return Ok(None);
289        };
290        let Some(debug_directory) = opt_header.data_directories.get_debug_table() else {
291            return Ok(None);
292        };
293        let file_alignment = opt_header.windows_fields.file_alignment;
294        let parse_options = &pe::options::ParseOptions::default();
295        let Some(offset) = pe::utils::find_offset(
296            debug_directory.virtual_address as usize,
297            &self.pe.sections,
298            file_alignment,
299            parse_options,
300        ) else {
301            return Ok(None);
302        };
303
304        use pe::debug::ImageDebugDirectory;
305        let entries = debug_directory.size as usize / std::mem::size_of::<ImageDebugDirectory>();
306        for i in 0..entries {
307            let entry = offset + i * std::mem::size_of::<ImageDebugDirectory>();
308            let idd: ImageDebugDirectory = self.data.pread_with(entry, LE).map_err(PeError::new)?;
309
310            // We're only looking for Embedded Portable PDB Debug Directory Entry (type 17).
311            if idd.data_type == 17 {
312                // See data specification:
313                // https://github.com/dotnet/runtime/blob/97ddb55e3adde20ceac579d935cef83cfe996169/docs/design/specs/PE-COFF.md#embedded-portable-pdb-debug-directory-entry-type-17
314                if idd.size_of_data < 8 {
315                    return Err(PeError::new(symbolic_ppdb::FormatError::from(
316                        symbolic_ppdb::FormatErrorKind::InvalidLength,
317                    )));
318                }
319
320                // ImageDebugDirectory.pointer_to_raw_data stores a raw offset -- not a virtual offset -- which we can use directly
321                let mut offset: usize = match parse_options.resolve_rva {
322                    true => idd.pointer_to_raw_data as usize,
323                    false => idd.address_of_raw_data as usize,
324                };
325
326                let mut signature: [u8; 4] = [0; 4];
327                self.data
328                    .gread_inout(&mut offset, &mut signature)
329                    .map_err(PeError::new)?;
330                if signature != "MPDB".as_bytes() {
331                    return Err(PeError::new(symbolic_ppdb::FormatError::from(
332                        symbolic_ppdb::FormatErrorKind::InvalidSignature,
333                    )));
334                }
335                let uncompressed_size: u32 = self
336                    .data
337                    .gread_with(&mut offset, LE)
338                    .map_err(PeError::new)?;
339
340                // 8 == the number bytes we have just read.
341                let compressed_size = idd.size_of_data as usize - 8;
342
343                return Ok(Some(PeEmbeddedPortablePDB {
344                    compressed_data: self
345                        .data
346                        .get(offset..(offset + compressed_size))
347                        .ok_or_else(|| {
348                            PeError::new(symbolic_ppdb::FormatError::from(
349                                symbolic_ppdb::FormatErrorKind::InvalidBlobOffset,
350                            ))
351                        })?,
352                    uncompressed_size: uncompressed_size as usize,
353                }));
354            }
355        }
356        Ok(None)
357    }
358}
359
360impl fmt::Debug for PeObject<'_> {
361    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
362        f.debug_struct("PeObject")
363            .field("code_id", &self.code_id())
364            .field("debug_id", &self.debug_id())
365            .field("debug_file_name", &self.debug_file_name())
366            .field("arch", &self.arch())
367            .field("kind", &self.kind())
368            .field("load_address", &format_args!("{:#x}", self.load_address()))
369            .field("has_symbols", &self.has_symbols())
370            .field("has_debug_info", &self.has_debug_info())
371            .field("has_unwind_info", &self.has_unwind_info())
372            .field("is_malformed", &self.is_malformed())
373            .finish()
374    }
375}
376
377impl<'slf, 'data: 'slf> AsSelf<'slf> for PeObject<'data> {
378    type Ref = PeObject<'slf>;
379
380    fn as_self(&'slf self) -> &'slf Self::Ref {
381        self
382    }
383}
384
385impl<'data> Parse<'data> for PeObject<'data> {
386    type Error = PeError;
387
388    fn test(data: &[u8]) -> bool {
389        Self::test(data)
390    }
391
392    fn parse(data: &'data [u8]) -> Result<Self, PeError> {
393        Self::parse(data)
394    }
395}
396
397impl<'data: 'object, 'object> ObjectLike<'data, 'object> for PeObject<'data> {
398    type Error = DwarfError;
399    type Session = DwarfDebugSession<'data>;
400    type SymbolIterator = PeSymbolIterator<'data, 'object>;
401
402    fn file_format(&self) -> FileFormat {
403        self.file_format()
404    }
405
406    fn code_id(&self) -> Option<CodeId> {
407        self.code_id()
408    }
409
410    fn debug_id(&self) -> DebugId {
411        self.debug_id()
412    }
413
414    fn arch(&self) -> Arch {
415        self.arch()
416    }
417
418    fn kind(&self) -> ObjectKind {
419        self.kind()
420    }
421
422    fn load_address(&self) -> u64 {
423        self.load_address()
424    }
425
426    fn has_symbols(&self) -> bool {
427        self.has_symbols()
428    }
429
430    fn symbols(&'object self) -> Self::SymbolIterator {
431        self.symbols()
432    }
433
434    fn symbol_map(&self) -> SymbolMap<'data> {
435        self.symbol_map()
436    }
437
438    fn has_debug_info(&self) -> bool {
439        self.has_debug_info()
440    }
441
442    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
443        self.debug_session()
444    }
445
446    fn has_unwind_info(&self) -> bool {
447        self.has_unwind_info()
448    }
449
450    fn has_sources(&self) -> bool {
451        self.has_sources()
452    }
453
454    fn is_malformed(&self) -> bool {
455        self.is_malformed()
456    }
457}
458
459/// An iterator over symbols in the PE file.
460///
461/// Returned by [`PeObject::symbols`](struct.PeObject.html#method.symbols).
462pub struct PeSymbolIterator<'data, 'object> {
463    exports: std::slice::Iter<'object, pe::export::Export<'data>>,
464}
465
466impl<'data> Iterator for PeSymbolIterator<'data, '_> {
467    type Item = Symbol<'data>;
468
469    fn next(&mut self) -> Option<Self::Item> {
470        self.exports.next().map(|export| Symbol {
471            name: export.name.map(Cow::Borrowed),
472            address: export.rva as u64,
473            size: export.size as u64,
474        })
475    }
476}
477
478impl<'data> Dwarf<'data> for PeObject<'data> {
479    fn endianity(&self) -> RunTimeEndian {
480        // According to https://reverseengineering.stackexchange.com/questions/17922/determining-endianness-of-pe-files-windows-on-arm,
481        // the only known platform running PE's with big-endian code is the Xbox360. Probably not worth handling.
482        RunTimeEndian::Little
483    }
484
485    fn raw_section(&self, name: &str) -> Option<DwarfSection<'data>> {
486        // Name is given without leading "."
487        let sect = self.section(&format!(".{name}"))?;
488        let start = sect.pointer_to_raw_data as usize;
489        let end = start + (sect.virtual_size as usize);
490        let dwarf_data: &'data [u8] = self.data.get(start..end)?;
491        let dwarf_sect = DwarfSection {
492            // TODO: What about 64-bit PE+? Still 32 bit?
493            address: u64::from(sect.virtual_address),
494            data: Cow::from(dwarf_data),
495            offset: u64::from(sect.pointer_to_raw_data),
496            align: 4096, // TODO: Does goblin expose this? For now, assume 4K page size
497        };
498        Some(dwarf_sect)
499    }
500}
501
502/// Embedded Portable PDB data wrapper that can be decompressed when needed.
503#[derive(Debug, Clone)]
504pub struct PeEmbeddedPortablePDB<'data> {
505    compressed_data: &'data [u8],
506    uncompressed_size: usize,
507}
508
509impl PeEmbeddedPortablePDB<'_> {
510    /// Returns the uncompressed size of the Portable PDB buffer.
511    pub fn get_size(&self) -> usize {
512        self.uncompressed_size
513    }
514
515    /// Reads the Portable PDB contents into the writer.
516    pub fn decompress_to<W: Write>(&self, output: W) -> Result<(), PeError> {
517        use std::io::prelude::*;
518        let mut decoder = flate2::write::DeflateDecoder::new(output);
519        decoder
520            .write_all(self.compressed_data)
521            .and_then(|_| decoder.finish())
522            .map_err(PeError::new)?;
523        Ok(())
524    }
525}