symbolic_debuginfo/
pdb.rs

1//! Support for Program Database, the debug companion format on Windows.
2
3use std::borrow::Cow;
4use std::collections::btree_map::BTreeMap;
5use std::error::Error;
6use std::fmt;
7use std::io::Cursor;
8use std::sync::Arc;
9
10use elsa::FrozenMap;
11use parking_lot::RwLock;
12use pdb_addr2line::pdb::{
13    AddressMap, FallibleIterator, ImageSectionHeader, InlineSiteSymbol, LineProgram, MachineType,
14    Module, ModuleInfo, PdbInternalSectionOffset, ProcedureSymbol, RawString, SeparatedCodeSymbol,
15    SymbolData, TypeIndex,
16};
17use pdb_addr2line::ModuleProvider;
18use smallvec::SmallVec;
19use srcsrv;
20use thiserror::Error;
21
22use symbolic_common::{
23    Arch, AsSelf, CodeId, CpuFamily, DebugId, Language, Name, NameMangling, SelfCell, Uuid,
24};
25
26use crate::base::*;
27use crate::function_stack::FunctionStack;
28use crate::sourcebundle::SourceFileDescriptor;
29
30type Pdb<'data> = pdb::PDB<'data, Cursor<&'data [u8]>>;
31
32const MAGIC_BIG: &[u8] = b"Microsoft C/C++ MSF 7.00\r\n\x1a\x44\x53\x00\x00\x00";
33
34// Used for CFI, remove once abstraction is complete
35#[doc(hidden)]
36pub use pdb_addr2line::pdb;
37
38/// The error type for [`PdbError`].
39#[non_exhaustive]
40#[derive(Clone, Copy, Debug, PartialEq, Eq)]
41pub enum PdbErrorKind {
42    /// The PDB file is corrupted. See the cause for more information.
43    BadObject,
44
45    /// An inline record was encountered without an inlining parent.
46    UnexpectedInline,
47
48    /// Formatting of a type name failed.
49    FormattingFailed,
50}
51
52impl fmt::Display for PdbErrorKind {
53    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54        match self {
55            Self::BadObject => write!(f, "invalid pdb file"),
56            Self::UnexpectedInline => write!(f, "unexpected inline function without parent"),
57            Self::FormattingFailed => write!(f, "failed to format type name"),
58        }
59    }
60}
61
62/// An error when dealing with [`PdbObject`](struct.PdbObject.html).
63#[derive(Debug, Error)]
64#[error("{kind}")]
65pub struct PdbError {
66    kind: PdbErrorKind,
67    #[source]
68    source: Option<Box<dyn Error + Send + Sync + 'static>>,
69}
70
71impl PdbError {
72    /// Creates a new PDB error from a known kind of error as well as an arbitrary error
73    /// payload.
74    fn new<E>(kind: PdbErrorKind, source: E) -> Self
75    where
76        E: Into<Box<dyn Error + Send + Sync>>,
77    {
78        let source = Some(source.into());
79        Self { kind, source }
80    }
81
82    /// Returns the corresponding [`PdbErrorKind`] for this error.
83    pub fn kind(&self) -> PdbErrorKind {
84        self.kind
85    }
86}
87
88impl From<PdbErrorKind> for PdbError {
89    fn from(kind: PdbErrorKind) -> Self {
90        Self { kind, source: None }
91    }
92}
93
94impl From<pdb::Error> for PdbError {
95    fn from(e: pdb::Error) -> Self {
96        Self::new(PdbErrorKind::BadObject, e)
97    }
98}
99
100impl From<fmt::Error> for PdbError {
101    fn from(e: fmt::Error) -> Self {
102        Self::new(PdbErrorKind::FormattingFailed, e)
103    }
104}
105
106impl From<pdb_addr2line::Error> for PdbError {
107    fn from(e: pdb_addr2line::Error) -> Self {
108        match e {
109            pdb_addr2line::Error::PdbError(e) => Self::new(PdbErrorKind::BadObject, e),
110            pdb_addr2line::Error::FormatError(e) => Self::new(PdbErrorKind::FormattingFailed, e),
111            e => Self::new(PdbErrorKind::FormattingFailed, e),
112        }
113    }
114}
115
116/// Program Database, the debug companion format on Windows.
117///
118/// This object is a sole debug companion to [`PeObject`](../pdb/struct.PdbObject.html).
119pub struct PdbObject<'data> {
120    pdb: Arc<RwLock<Pdb<'data>>>,
121    debug_info: Arc<pdb::DebugInformation<'data>>,
122    pdb_info: pdb::PDBInformation<'data>,
123    public_syms: pdb::SymbolTable<'data>,
124    executable_sections: ExecutableSections,
125    data: &'data [u8],
126}
127
128// NB: The pdb crate simulates mmap behavior on any Read + Seek type. This implementation requires
129// mutability of the `Source` and uses trait objects without a Send + Sync barrier. We know that we
130// only instanciate `&[u8]` as source. Whenever we mutate the reader (to read a new module stream),
131// we acquire a write lock on the PDB, which should be sufficient.
132unsafe impl Send for PdbObject<'_> {}
133unsafe impl Sync for PdbObject<'_> {}
134
135impl<'data> PdbObject<'data> {
136    /// Tests whether the buffer could contain an PDB object.
137    pub fn test(data: &[u8]) -> bool {
138        // NB: "Microsoft C/C++ program database 2.00" is not supported by the pdb crate, so there
139        // is no point in pretending we could read it.
140        data.starts_with(MAGIC_BIG)
141    }
142
143    /// Tries to parse a PDB object from the given slice.
144    #[allow(clippy::arc_with_non_send_sync)]
145    pub fn parse(data: &'data [u8]) -> Result<Self, PdbError> {
146        let mut pdb = Pdb::open(Cursor::new(data))?;
147        let dbi = pdb.debug_information()?;
148        let pdbi = pdb.pdb_information()?;
149        let pubi = pdb.global_symbols()?;
150        let sections = pdb.sections()?;
151
152        Ok(PdbObject {
153            pdb: Arc::new(RwLock::new(pdb)),
154            debug_info: Arc::new(dbi),
155            pdb_info: pdbi,
156            public_syms: pubi,
157            data,
158            executable_sections: ExecutableSections::from_sections(&sections),
159        })
160    }
161
162    /// The container file format, which is always `FileFormat::Pdb`.
163    pub fn file_format(&self) -> FileFormat {
164        FileFormat::Pdb
165    }
166
167    /// The code identifier of this object, always `None`.
168    ///
169    /// PDB files do not contain sufficient information to compute the code identifier, since they
170    /// are lacking the relevant parts of the PE header.
171    pub fn code_id(&self) -> Option<CodeId> {
172        None
173    }
174
175    /// The debug information identifier of this PDB.
176    ///
177    /// The PDB stores a specific header that contains GUID and age bits. Additionally, Microsoft
178    /// uses the file name of the PDB to avoid GUID collisions. In most contexts, however, it is
179    /// sufficient to rely on the uniqueness of the GUID to identify a PDB.
180    ///
181    /// The same information is also stored in a header in the corresponding PE file, which can be
182    /// used to locate a PDB from a PE.
183    pub fn debug_id(&self) -> DebugId {
184        // Prefer the age from the debug information stream, as it is more likely to correspond to
185        // the executable than the PDB info header. The latter is often bumped independently when
186        // the PDB is processed or optimized, which causes it to go out of sync with the original
187        // image.
188        let age = self.debug_info.age().unwrap_or(self.pdb_info.age);
189        match Uuid::from_slice(&self.pdb_info.guid.as_bytes()[..]) {
190            Ok(uuid) => DebugId::from_parts(uuid, age),
191            Err(_) => DebugId::default(),
192        }
193    }
194
195    /// The CPU architecture of this object, as specified in the debug information stream (DBI).
196    pub fn arch(&self) -> Arch {
197        self.debug_info
198            .machine_type()
199            .ok()
200            .map(arch_from_machine)
201            .unwrap_or_default()
202    }
203
204    /// The kind of this object, which is always `Debug`.
205    pub fn kind(&self) -> ObjectKind {
206        ObjectKind::Debug
207    }
208
209    /// The address at which the image prefers to be loaded into memory.
210    ///
211    /// The PDB only stores relative addresses, and more importantly, does not provide sufficient
212    /// information to compute the original PE's load address. The according PE, however does
213    /// feature a load address (called `image_base`). See [`PeObject::load_address`] for more
214    /// information.
215    ///
216    /// [`PeObject::load_address`]: ../pe/struct.PeObject.html#method.load_address
217    pub fn load_address(&self) -> u64 {
218        0
219    }
220
221    /// Determines whether this object exposes a public symbol table.
222    pub fn has_symbols(&self) -> bool {
223        // We can safely assume that PDBs will always contain symbols.
224        true
225    }
226
227    /// Returns an iterator over symbols in the public symbol table.
228    pub fn symbols(&self) -> PdbSymbolIterator<'data, '_> {
229        PdbSymbolIterator {
230            symbols: self.public_syms.iter(),
231            address_map: self.pdb.write().address_map().ok(),
232            executable_sections: &self.executable_sections,
233        }
234    }
235
236    /// Returns an ordered map of symbols in the symbol table.
237    pub fn symbol_map(&self) -> SymbolMap<'data> {
238        self.symbols().collect()
239    }
240
241    /// Determines whether this object contains debug information.
242    pub fn has_debug_info(&self) -> bool {
243        // There is no cheap way to find out if a PDB contains debugging information that we care
244        // about. Effectively, we're interested in local symbols declared in the module info
245        // streams. To reliably determine whether any stream is present, we'd have to probe each one
246        // of them, which can result in quite a lot of disk I/O.
247        true
248    }
249
250    /// Determines whether this object contains embedded source.
251    pub fn has_sources(&self) -> bool {
252        false
253    }
254
255    /// Returns the SRCSRV VCS integration name if available.
256    ///
257    /// This extracts the version control system identifier from the SRCSRV stream,
258    /// if present. Common values include "perforce", "tfs", "git", etc.
259    /// Returns `None` if no SRCSRV stream exists or if it cannot be parsed.
260    pub fn srcsrv_vcs_name(&self) -> Option<String> {
261        let mut pdb = self.pdb.write();
262
263        // Try to open the "srcsrv" named stream
264        let stream = match pdb.named_stream(b"srcsrv") {
265            Ok(stream) => stream,
266            Err(_) => return None,
267        };
268
269        // Parse the stream to extract VCS name
270        let stream_data = stream.as_slice();
271        if let Ok(parsed_stream) = srcsrv::SrcSrvStream::parse(stream_data) {
272            parsed_stream
273                .version_control_description()
274                .map(|s| s.to_string())
275        } else {
276            None
277        }
278    }
279
280    /// Determines whether this object is malformed and was only partially parsed
281    pub fn is_malformed(&self) -> bool {
282        false
283    }
284
285    /// Constructs a debugging session.
286    pub fn debug_session(&self) -> Result<PdbDebugSession<'data>, PdbError> {
287        PdbDebugSession::build(self)
288    }
289
290    /// Determines whether this object contains stack unwinding information.
291    pub fn has_unwind_info(&self) -> bool {
292        // The PDB crate currently loads quite a lot of information from the PDB when accessing the
293        // frame table. However, we expect unwind info in every PDB for 32-bit builds, so we can
294        // just assume it's there if the architecture matches.
295        // TODO: Implement a better way by exposing the extra streams in the PDB crate.
296        self.arch().cpu_family() == CpuFamily::Intel32
297    }
298
299    /// Returns the raw data of the ELF file.
300    pub fn data(&self) -> &'data [u8] {
301        self.data
302    }
303
304    #[doc(hidden)]
305    pub fn inner(&self) -> &RwLock<Pdb<'data>> {
306        &self.pdb
307    }
308}
309
310impl fmt::Debug for PdbObject<'_> {
311    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
312        f.debug_struct("PdbObject")
313            .field("debug_id", &self.debug_id())
314            .field("arch", &self.arch())
315            .field("load_address", &format_args!("{:#x}", self.load_address()))
316            .field("has_symbols", &self.has_symbols())
317            .field("has_debug_info", &self.has_debug_info())
318            .field("has_unwind_info", &self.has_unwind_info())
319            .field("is_malformed", &self.is_malformed())
320            .finish()
321    }
322}
323
324impl<'slf, 'data: 'slf> AsSelf<'slf> for PdbObject<'data> {
325    type Ref = PdbObject<'slf>;
326
327    fn as_self(&'slf self) -> &'slf Self::Ref {
328        unsafe { std::mem::transmute(self) }
329    }
330}
331
332impl<'data> Parse<'data> for PdbObject<'data> {
333    type Error = PdbError;
334
335    fn test(data: &[u8]) -> bool {
336        Self::test(data)
337    }
338
339    fn parse(data: &'data [u8]) -> Result<Self, PdbError> {
340        Self::parse(data)
341    }
342}
343
344impl<'data: 'object, 'object> ObjectLike<'data, 'object> for PdbObject<'data> {
345    type Error = PdbError;
346    type Session = PdbDebugSession<'data>;
347    type SymbolIterator = PdbSymbolIterator<'data, 'object>;
348
349    fn file_format(&self) -> FileFormat {
350        self.file_format()
351    }
352
353    fn code_id(&self) -> Option<CodeId> {
354        self.code_id()
355    }
356
357    fn debug_id(&self) -> DebugId {
358        self.debug_id()
359    }
360
361    fn arch(&self) -> Arch {
362        self.arch()
363    }
364
365    fn kind(&self) -> ObjectKind {
366        self.kind()
367    }
368
369    fn load_address(&self) -> u64 {
370        self.load_address()
371    }
372
373    fn has_symbols(&self) -> bool {
374        self.has_symbols()
375    }
376
377    fn symbols(&'object self) -> Self::SymbolIterator {
378        self.symbols()
379    }
380
381    fn symbol_map(&self) -> SymbolMap<'data> {
382        self.symbol_map()
383    }
384
385    fn has_debug_info(&self) -> bool {
386        self.has_debug_info()
387    }
388
389    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
390        self.debug_session()
391    }
392
393    fn has_unwind_info(&self) -> bool {
394        self.has_unwind_info()
395    }
396
397    fn has_sources(&self) -> bool {
398        self.has_sources()
399    }
400
401    fn is_malformed(&self) -> bool {
402        self.is_malformed()
403    }
404}
405
406pub(crate) fn arch_from_machine(machine: MachineType) -> Arch {
407    match machine {
408        MachineType::X86 => Arch::X86,
409        MachineType::Amd64 => Arch::Amd64,
410        MachineType::Arm => Arch::Arm,
411        MachineType::Arm64 => Arch::Arm64,
412        MachineType::PowerPC => Arch::Ppc,
413        _ => Arch::Unknown,
414    }
415}
416
417/// Contains information about which sections are executable.
418struct ExecutableSections {
419    /// For every section header in the PDB, a boolean which indicates whether the "executable"
420    /// or "execute" flag is set in the section header's characteristics.
421    is_executable_per_section: Vec<bool>,
422}
423
424impl ExecutableSections {
425    pub fn from_sections(sections: &Option<Vec<ImageSectionHeader>>) -> Self {
426        Self {
427            is_executable_per_section: match sections {
428                Some(sections) => sections
429                    .iter()
430                    .map(|section| section.characteristics)
431                    .map(|char| char.executable() || char.execute())
432                    .collect(),
433                None => Default::default(),
434            },
435        }
436    }
437
438    /// Returns whether the given offset is contained in an executable section.
439    pub fn contains(&self, offset: &PdbInternalSectionOffset) -> bool {
440        // offset.section is a one-based index.
441        if offset.section == 0 {
442            // No section.
443            return false;
444        }
445
446        let section_index = (offset.section - 1) as usize;
447        self.is_executable_per_section
448            .get(section_index)
449            .cloned()
450            .unwrap_or(false)
451    }
452}
453
454/// An iterator over symbols in the PDB file.
455///
456/// Returned by [`PdbObject::symbols`](struct.PdbObject.html#method.symbols).
457pub struct PdbSymbolIterator<'data, 'object> {
458    symbols: pdb::SymbolIter<'object>,
459    address_map: Option<AddressMap<'data>>,
460    executable_sections: &'object ExecutableSections,
461}
462
463impl<'data> Iterator for PdbSymbolIterator<'data, '_> {
464    type Item = Symbol<'data>;
465
466    fn next(&mut self) -> Option<Self::Item> {
467        let address_map = self.address_map.as_ref()?;
468
469        while let Ok(Some(symbol)) = self.symbols.next() {
470            if let Ok(SymbolData::Public(public)) = symbol.parse() {
471                if !self.executable_sections.contains(&public.offset) {
472                    continue;
473                }
474
475                let address = match public.offset.to_rva(address_map) {
476                    Some(address) => address,
477                    None => continue,
478                };
479
480                // pdb::SymbolIter offers data bound to its own lifetime since it holds the
481                // buffer containing public symbols. The contract requires that we return
482                // `Symbol<'data>`, so we cannot return zero-copy symbols here.
483                let cow = public.name.to_string();
484                let name = Cow::from(String::from(cow));
485
486                return Some(Symbol {
487                    name: Some(name),
488                    address: u64::from(address.0),
489                    size: 0, // Computed in `SymbolMap`
490                });
491            }
492        }
493
494        None
495    }
496}
497
498struct PdbStreams<'d> {
499    debug_info: Arc<pdb::DebugInformation<'d>>,
500    type_info: pdb::TypeInformation<'d>,
501    id_info: pdb::IdInformation<'d>,
502    string_table: Option<pdb::StringTable<'d>>,
503
504    pdb: Arc<RwLock<Pdb<'d>>>,
505
506    /// ModuleInfo objects are stored on this object (outside PdbDebugInfo) so that the
507    /// PdbDebugInfo can store a TypeFormatter, which has a lifetime dependency on its
508    /// ModuleProvider, which is this PdbStreams. This is so that TypeFormatter can cache
509    /// CrossModuleImports inside itself, and those have a lifetime dependency on the
510    /// ModuleInfo.
511    module_infos: FrozenMap<usize, Box<ModuleInfo<'d>>>,
512}
513
514impl<'d> PdbStreams<'d> {
515    fn from_pdb(pdb: &PdbObject<'d>) -> Result<Self, PdbError> {
516        let mut p = pdb.pdb.write();
517
518        // PDB::string_table errors if the named stream for the string table is not present.
519        // However, this occurs in certain PDBs and does not automatically indicate an error.
520        let string_table = match p.string_table() {
521            Ok(string_table) => Some(string_table),
522            Err(pdb::Error::StreamNameNotFound) => None,
523            Err(e) => return Err(e.into()),
524        };
525
526        Ok(Self {
527            string_table,
528            debug_info: pdb.debug_info.clone(),
529            type_info: p.type_information()?,
530            id_info: p.id_information()?,
531            pdb: pdb.pdb.clone(),
532            module_infos: FrozenMap::new(),
533        })
534    }
535}
536
537impl<'d> pdb_addr2line::ModuleProvider<'d> for PdbStreams<'d> {
538    fn get_module_info(
539        &self,
540        module_index: usize,
541        module: &Module,
542    ) -> Result<Option<&ModuleInfo<'d>>, pdb::Error> {
543        if let Some(module_info) = self.module_infos.get(&module_index) {
544            return Ok(Some(module_info));
545        }
546
547        let mut pdb = self.pdb.write();
548        Ok(pdb.module_info(module)?.map(|module_info| {
549            self.module_infos
550                .insert(module_index, Box::new(module_info))
551        }))
552    }
553}
554
555struct PdbDebugInfo<'d> {
556    /// The streams, to load module streams on demand.
557    streams: &'d PdbStreams<'d>,
558    /// OMAP structure to map reordered sections to RVAs.
559    address_map: pdb::AddressMap<'d>,
560    /// String table for name lookups.
561    string_table: Option<&'d pdb::StringTable<'d>>,
562    /// Type formatter for function name strings.
563    type_formatter: pdb_addr2line::TypeFormatter<'d, 'd>,
564}
565
566impl<'d> PdbDebugInfo<'d> {
567    fn build(pdb: &PdbObject<'d>, streams: &'d PdbStreams<'d>) -> Result<Self, PdbError> {
568        let modules = streams.debug_info.modules()?.collect::<Vec<_>>()?;
569
570        // Avoid deadlocks by only covering the two access to the address map. For
571        // instance, `pdb.symbol_map()` requires a mutable borrow of the PDB as well.
572        let mut p = pdb.pdb.write();
573        let address_map = p.address_map()?;
574
575        drop(p);
576
577        Ok(PdbDebugInfo {
578            address_map,
579            streams,
580            string_table: streams.string_table.as_ref(),
581            type_formatter: pdb_addr2line::TypeFormatter::new_from_parts(
582                streams,
583                modules,
584                &streams.debug_info,
585                &streams.type_info,
586                &streams.id_info,
587                streams.string_table.as_ref(),
588                Default::default(),
589            )?,
590        })
591    }
592
593    /// Returns an iterator over all compilation units (modules).
594    fn units(&'d self) -> PdbUnitIterator<'d> {
595        PdbUnitIterator {
596            debug_info: self,
597            index: 0,
598        }
599    }
600
601    fn modules(&self) -> &[Module<'d>] {
602        self.type_formatter.modules()
603    }
604
605    fn get_module(&'d self, index: usize) -> Result<Option<&'d ModuleInfo<'d>>, PdbError> {
606        // Silently ignore module references out-of-bound
607        let module = match self.modules().get(index) {
608            Some(module) => module,
609            None => return Ok(None),
610        };
611
612        Ok(self.streams.get_module_info(index, module)?)
613    }
614
615    fn file_info(&self, file_info: pdb::FileInfo<'d>) -> Result<FileInfo<'_>, PdbError> {
616        let file_path = match self.string_table {
617            Some(string_table) => file_info.name.to_raw_string(string_table)?,
618            None => "".into(),
619        };
620
621        Ok(FileInfo::from_path(file_path.as_bytes()))
622    }
623}
624
625impl<'slf, 'd: 'slf> AsSelf<'slf> for PdbDebugInfo<'d> {
626    type Ref = PdbDebugInfo<'slf>;
627
628    fn as_self(&'slf self) -> &'slf Self::Ref {
629        unsafe { std::mem::transmute(self) }
630    }
631}
632
633/// Debug session for PDB objects.
634pub struct PdbDebugSession<'d> {
635    cell: SelfCell<Box<PdbStreams<'d>>, PdbDebugInfo<'d>>,
636}
637
638impl<'d> PdbDebugSession<'d> {
639    fn build(pdb: &PdbObject<'d>) -> Result<Self, PdbError> {
640        let streams = PdbStreams::from_pdb(pdb)?;
641        let cell = SelfCell::try_new(Box::new(streams), |streams| {
642            PdbDebugInfo::build(pdb, unsafe { &*streams })
643        })?;
644
645        Ok(PdbDebugSession { cell })
646    }
647
648    /// Returns an iterator over all source files in this debug file.
649    pub fn files(&self) -> PdbFileIterator<'_> {
650        PdbFileIterator {
651            debug_info: self.cell.get(),
652            units: self.cell.get().units(),
653            files: pdb::FileIterator::default(),
654            finished: false,
655        }
656    }
657
658    /// Returns an iterator over all functions in this debug file.
659    pub fn functions(&self) -> PdbFunctionIterator<'_> {
660        PdbFunctionIterator {
661            units: self.cell.get().units(),
662            functions: Vec::new().into_iter(),
663            finished: false,
664        }
665    }
666
667    /// See [DebugSession::source_by_path] for more information.
668    pub fn source_by_path(
669        &self,
670        _path: &str,
671    ) -> Result<Option<SourceFileDescriptor<'_>>, PdbError> {
672        Ok(None)
673    }
674}
675
676impl<'session> DebugSession<'session> for PdbDebugSession<'_> {
677    type Error = PdbError;
678    type FunctionIterator = PdbFunctionIterator<'session>;
679    type FileIterator = PdbFileIterator<'session>;
680
681    fn functions(&'session self) -> Self::FunctionIterator {
682        self.functions()
683    }
684
685    fn files(&'session self) -> Self::FileIterator {
686        self.files()
687    }
688
689    fn source_by_path(&self, path: &str) -> Result<Option<SourceFileDescriptor<'_>>, Self::Error> {
690        self.source_by_path(path)
691    }
692}
693
694struct Unit<'s> {
695    debug_info: &'s PdbDebugInfo<'s>,
696    module_index: usize,
697    module: &'s pdb::ModuleInfo<'s>,
698}
699
700impl<'s> Unit<'s> {
701    fn load(
702        debug_info: &'s PdbDebugInfo<'s>,
703        module_index: usize,
704        module: &'s pdb::ModuleInfo<'s>,
705    ) -> Result<Self, PdbError> {
706        Ok(Self {
707            debug_info,
708            module_index,
709            module,
710        })
711    }
712
713    fn collect_lines<I>(
714        &self,
715        mut line_iter: I,
716        program: &LineProgram<'s>,
717    ) -> Result<Vec<LineInfo<'s>>, PdbError>
718    where
719        I: FallibleIterator<Item = pdb::LineInfo>,
720        PdbError: From<I::Error>,
721    {
722        let address_map = &self.debug_info.address_map;
723
724        let mut lines = Vec::new();
725        while let Some(line_info) = line_iter.next()? {
726            let rva = match line_info.offset.to_rva(address_map) {
727                Some(rva) => u64::from(rva.0),
728                None => continue,
729            };
730
731            // skip 0-sized line infos
732            let size = line_info.length.map(u64::from);
733            if size == Some(0) {
734                continue;
735            }
736
737            let file_info = program.get_file_info(line_info.file_index)?;
738
739            lines.push(LineInfo {
740                address: rva,
741                size,
742                file: self.debug_info.file_info(file_info)?,
743                line: line_info.line_start.into(),
744            });
745        }
746        lines.sort_by_key(|line| line.address);
747
748        // Merge line infos that only differ in their `column` information, which we don't
749        // care about. We only want to output line infos that differ in their file/line.
750        lines.dedup_by(|current, prev| {
751            // the records need to be consecutive to be able to merge
752            let first_end = prev.size.and_then(|size| prev.address.checked_add(size));
753            let is_consecutive = first_end == Some(current.address);
754            // the line record points to the same file/line, so we want to merge/dedupe it
755            if is_consecutive && prev.file == current.file && prev.line == current.line {
756                prev.size = prev
757                    .size
758                    .map(|first_size| first_size.saturating_add(current.size.unwrap_or(0)));
759
760                return true;
761            }
762            false
763        });
764
765        Ok(lines)
766    }
767
768    /// Sanitize the collected lines.
769    ///
770    /// This essentially filters out all the lines that lay outside of the function range.
771    ///
772    /// For example we have observed in a real-world pdb that has:
773    /// - A function 0x33ea50 (size 0xc)
774    /// - With one line record: 0x33e850 (size 0x26)
775    ///
776    /// The line record is completely outside the range of the function.
777    fn sanitize_lines(func: &mut Function) {
778        let fn_start = func.address;
779        let fn_end = func.end_address();
780        func.lines.retain(|line| {
781            if line.address >= fn_end {
782                return false;
783            }
784            let line_end = match line.size {
785                Some(size) => line.address.saturating_add(size),
786                None => return true,
787            };
788            line_end > fn_start
789        });
790    }
791
792    fn handle_function(
793        &self,
794        offset: PdbInternalSectionOffset,
795        len: u32,
796        name: RawString<'s>,
797        type_index: TypeIndex,
798        program: &LineProgram<'s>,
799    ) -> Result<Option<Function<'s>>, PdbError> {
800        let address_map = &self.debug_info.address_map;
801
802        // Translate the function's address to the PE's address space. If this fails, we're
803        // likely dealing with an invalid function and can skip it.
804        let address = match offset.to_rva(address_map) {
805            Some(addr) => u64::from(addr.0),
806            None => return Ok(None),
807        };
808
809        // Names from the private symbol table are generally demangled. They contain the path of the
810        // scope and name of the function itself, including type parameters, and the parameter lists
811        // are contained in the type info. We do not emit a return type.
812        let formatter = &self.debug_info.type_formatter;
813        let name = name.to_string();
814        let name = Name::new(
815            formatter
816                .format_function(&name, self.module_index, type_index)
817                .map(Cow::Owned)
818                .unwrap_or(name),
819            NameMangling::Unmangled,
820            Language::Unknown,
821        );
822
823        let line_iter = program.lines_for_symbol(offset);
824        let lines = self.collect_lines(line_iter, program)?;
825
826        Ok(Some(Function {
827            address,
828            size: len.into(),
829            name,
830            compilation_dir: &[],
831            lines,
832            inlinees: Vec::new(),
833            inline: false,
834        }))
835    }
836
837    fn handle_procedure(
838        &self,
839        proc: &ProcedureSymbol<'s>,
840        program: &LineProgram<'s>,
841    ) -> Result<Option<Function<'s>>, PdbError> {
842        self.handle_function(proc.offset, proc.len, proc.name, proc.type_index, program)
843    }
844
845    fn handle_separated_code(
846        &self,
847        proc: &ProcedureSymbol<'s>,
848        sepcode: &SeparatedCodeSymbol,
849        program: &LineProgram<'s>,
850    ) -> Result<Option<Function<'s>>, PdbError> {
851        self.handle_function(
852            sepcode.offset,
853            sepcode.len,
854            proc.name,
855            proc.type_index,
856            program,
857        )
858    }
859
860    fn handle_inlinee(
861        &self,
862        inline_site: InlineSiteSymbol<'s>,
863        parent_offset: PdbInternalSectionOffset,
864        inlinee: &pdb::Inlinee<'s>,
865        program: &LineProgram<'s>,
866    ) -> Result<Option<Function<'s>>, PdbError> {
867        let line_iter = inlinee.lines(parent_offset, &inline_site);
868        let lines = self.collect_lines(line_iter, program)?;
869
870        // If there are no line records, skip this inline function completely. Apparently, it was
871        // eliminated by the compiler, and cannot be hit by the program anymore. For `symbolic`,
872        // such functions do not have any use.
873        let start = match lines.first().map(|line| line.address) {
874            Some(address) => address,
875            None => return Ok(None),
876        };
877
878        let end = match lines
879            .last()
880            .map(|line| line.address + line.size.unwrap_or(1))
881        {
882            Some(address) => address,
883            None => return Ok(None),
884        };
885
886        let formatter = &self.debug_info.type_formatter;
887        let name = Name::new(
888            formatter.format_id(self.module_index, inline_site.inlinee)?,
889            NameMangling::Unmangled,
890            Language::Unknown,
891        );
892
893        Ok(Some(Function {
894            address: start,
895            size: end - start,
896            name,
897            compilation_dir: &[],
898            lines,
899            inlinees: Vec::new(),
900            inline: true,
901        }))
902    }
903
904    fn functions(&self) -> Result<Vec<Function<'s>>, PdbError> {
905        let program = self.module.line_program()?;
906        let mut symbols = self.module.symbols()?;
907
908        // Depending on the compiler version, the inlinee table might not be sorted. Since constant
909        // search through inlinees is too slow (due to repeated parsing), but Inlinees are rather
910        // small structures, it is relatively cheap to collect them into an in-memory index.
911        let inlinees: BTreeMap<_, _> = self
912            .module
913            .inlinees()?
914            .map(|i| Ok((i.index(), i)))
915            .collect()?;
916
917        let mut depth = 0;
918        let mut inc_next = false;
919        let mut skipped_depth = None;
920
921        let mut functions = Vec::new();
922        let mut stack = FunctionStack::new();
923        let mut proc_offsets = SmallVec::<[_; 3]>::new();
924        let mut last_proc = None;
925
926        while let Some(symbol) = symbols.next()? {
927            if inc_next {
928                depth += 1;
929            }
930
931            inc_next = symbol.starts_scope();
932            if symbol.ends_scope() {
933                depth -= 1;
934
935                if proc_offsets.last().is_some_and(|&(d, _)| d >= depth) {
936                    proc_offsets.pop();
937                }
938            }
939
940            // If we're navigating within a skipped function (see below), we can ignore this
941            // entry completely. Otherwise, we've moved out of any skipped function and can
942            // reset the stored depth.
943            match skipped_depth {
944                Some(skipped) if depth > skipped => continue,
945                _ => skipped_depth = None,
946            }
947
948            // Flush all functions out that exceed the current iteration depth. Since we
949            // encountered a symbol at this level, there will be no more inlinees to the
950            // previous function at the same level or any of it's children.
951            if symbol.ends_scope() {
952                stack.flush(depth, &mut functions);
953            }
954
955            let function = match symbol.parse() {
956                Ok(SymbolData::Procedure(proc)) => {
957                    proc_offsets.push((depth, proc.offset));
958                    let function = self.handle_procedure(&proc, &program)?;
959                    last_proc = Some(proc);
960                    function
961                }
962                Ok(SymbolData::SeparatedCode(sepcode)) => match last_proc.as_ref() {
963                    Some(last_proc) if last_proc.offset == sepcode.parent_offset => {
964                        self.handle_separated_code(last_proc, &sepcode, &program)?
965                    }
966                    _ => continue,
967                },
968                Ok(SymbolData::InlineSite(site)) => {
969                    let parent_offset = proc_offsets
970                        .last()
971                        .map(|&(_, offset)| offset)
972                        .ok_or(PdbErrorKind::UnexpectedInline)?;
973
974                    // We can assume that inlinees will be listed in the inlinee table. If missing,
975                    // skip silently instead of erroring out. Missing a single inline function is
976                    // more acceptable in such a case than halting iteration completely.
977                    if let Some(inlinee) = inlinees.get(&site.inlinee) {
978                        // We have seen that the MSVC Compiler `19.16` (VS 2017) can output
979                        // `ChangeFile` annotations which are not properly aligned to the beginning
980                        // of a file checksum, leading to `UnimplementedFileChecksumKind` errors.
981                        // Investigation showed that this can happen for inlined `{ctor}` functions,
982                        // but there are no clear leads to why that might have happened, and how to
983                        // recover from these broken annotations.
984                        // For that reason, we skip these inlinees completely so we do not fail
985                        // processing the complete pdb file.
986                        self.handle_inlinee(site, parent_offset, inlinee, &program)
987                            .ok()
988                            .flatten()
989                    } else {
990                        None
991                    }
992                }
993                // We need to ignore errors here since the PDB crate does not yet implement all
994                // symbol types. Instead of erroring too often, it's better to swallow these.
995                _ => continue,
996            };
997
998            match function {
999                Some(mut function) => {
1000                    Self::sanitize_lines(&mut function);
1001                    // TODO: figure out what to do with functions that have no more lines
1002                    // after sanitization
1003                    stack.push(depth, function)
1004                }
1005                None => skipped_depth = Some(depth),
1006            }
1007        }
1008
1009        // We're done, flush the remaining stack.
1010        stack.flush(0, &mut functions);
1011
1012        Ok(functions)
1013    }
1014}
1015
1016struct PdbUnitIterator<'s> {
1017    debug_info: &'s PdbDebugInfo<'s>,
1018    index: usize,
1019}
1020
1021impl<'s> Iterator for PdbUnitIterator<'s> {
1022    type Item = Result<Unit<'s>, PdbError>;
1023
1024    fn next(&mut self) -> Option<Self::Item> {
1025        let debug_info = self.debug_info;
1026        while self.index < debug_info.modules().len() {
1027            let module_index = self.index;
1028            let result = debug_info.get_module(module_index);
1029            self.index += 1;
1030
1031            let module = match result {
1032                Ok(Some(module)) => module,
1033                Ok(None) => continue,
1034                Err(error) => return Some(Err(error)),
1035            };
1036
1037            return Some(Unit::load(debug_info, module_index, module));
1038        }
1039
1040        None
1041    }
1042}
1043
1044/// An iterator over source files in a Pdb object.
1045pub struct PdbFileIterator<'s> {
1046    debug_info: &'s PdbDebugInfo<'s>,
1047    units: PdbUnitIterator<'s>,
1048    files: pdb::FileIterator<'s>,
1049    finished: bool,
1050}
1051
1052impl<'s> Iterator for PdbFileIterator<'s> {
1053    type Item = Result<FileEntry<'s>, PdbError>;
1054
1055    fn next(&mut self) -> Option<Self::Item> {
1056        if self.finished {
1057            return None;
1058        }
1059
1060        loop {
1061            if let Some(file_result) = self.files.next().transpose() {
1062                let result = file_result
1063                    .map_err(|err| err.into())
1064                    .and_then(|i| self.debug_info.file_info(i))
1065                    .map(|info| FileEntry::new(Cow::default(), info));
1066
1067                return Some(result);
1068            }
1069
1070            let unit = match self.units.next() {
1071                Some(Ok(unit)) => unit,
1072                Some(Err(error)) => return Some(Err(error)),
1073                None => break,
1074            };
1075
1076            let line_program = match unit.module.line_program() {
1077                Ok(line_program) => line_program,
1078                Err(error) => return Some(Err(error.into())),
1079            };
1080
1081            self.files = line_program.files();
1082        }
1083
1084        self.finished = true;
1085        None
1086    }
1087}
1088
1089/// An iterator over functions in a PDB file.
1090pub struct PdbFunctionIterator<'s> {
1091    units: PdbUnitIterator<'s>,
1092    functions: std::vec::IntoIter<Function<'s>>,
1093    finished: bool,
1094}
1095
1096impl<'s> Iterator for PdbFunctionIterator<'s> {
1097    type Item = Result<Function<'s>, PdbError>;
1098
1099    fn next(&mut self) -> Option<Self::Item> {
1100        if self.finished {
1101            return None;
1102        }
1103
1104        loop {
1105            if let Some(func) = self.functions.next() {
1106                return Some(Ok(func));
1107            }
1108
1109            let unit = match self.units.next() {
1110                Some(Ok(unit)) => unit,
1111                Some(Err(error)) => return Some(Err(error)),
1112                None => break,
1113            };
1114
1115            self.functions = match unit.functions() {
1116                Ok(functions) => functions.into_iter(),
1117                Err(error) => return Some(Err(error)),
1118            };
1119        }
1120
1121        self.finished = true;
1122        None
1123    }
1124}
1125
1126impl std::iter::FusedIterator for PdbFunctionIterator<'_> {}