symbolic_debuginfo/
pdb.rs

1//! Support for Program Database, the debug companion format on Windows.
2
3use std::borrow::Cow;
4use std::collections::btree_map::BTreeMap;
5use std::error::Error;
6use std::fmt;
7use std::io::Cursor;
8use std::sync::Arc;
9
10use elsa::FrozenMap;
11use parking_lot::RwLock;
12use pdb_addr2line::pdb::{
13    AddressMap, FallibleIterator, ImageSectionHeader, InlineSiteSymbol, LineProgram, MachineType,
14    Module, ModuleInfo, PdbInternalSectionOffset, ProcedureSymbol, RawString, SeparatedCodeSymbol,
15    SymbolData, TypeIndex,
16};
17use pdb_addr2line::ModuleProvider;
18use smallvec::SmallVec;
19use thiserror::Error;
20
21use symbolic_common::{
22    Arch, AsSelf, CodeId, CpuFamily, DebugId, Language, Name, NameMangling, SelfCell, Uuid,
23};
24
25use crate::base::*;
26use crate::function_stack::FunctionStack;
27use crate::sourcebundle::SourceFileDescriptor;
28
29type Pdb<'data> = pdb::PDB<'data, Cursor<&'data [u8]>>;
30
31const MAGIC_BIG: &[u8] = b"Microsoft C/C++ MSF 7.00\r\n\x1a\x44\x53\x00\x00\x00";
32
33// Used for CFI, remove once abstraction is complete
34#[doc(hidden)]
35pub use pdb_addr2line::pdb;
36
37/// The error type for [`PdbError`].
38#[non_exhaustive]
39#[derive(Clone, Copy, Debug, PartialEq, Eq)]
40pub enum PdbErrorKind {
41    /// The PDB file is corrupted. See the cause for more information.
42    BadObject,
43
44    /// An inline record was encountered without an inlining parent.
45    UnexpectedInline,
46
47    /// Formatting of a type name failed.
48    FormattingFailed,
49}
50
51impl fmt::Display for PdbErrorKind {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        match self {
54            Self::BadObject => write!(f, "invalid pdb file"),
55            Self::UnexpectedInline => write!(f, "unexpected inline function without parent"),
56            Self::FormattingFailed => write!(f, "failed to format type name"),
57        }
58    }
59}
60
61/// An error when dealing with [`PdbObject`](struct.PdbObject.html).
62#[derive(Debug, Error)]
63#[error("{kind}")]
64pub struct PdbError {
65    kind: PdbErrorKind,
66    #[source]
67    source: Option<Box<dyn Error + Send + Sync + 'static>>,
68}
69
70impl PdbError {
71    /// Creates a new PDB error from a known kind of error as well as an arbitrary error
72    /// payload.
73    fn new<E>(kind: PdbErrorKind, source: E) -> Self
74    where
75        E: Into<Box<dyn Error + Send + Sync>>,
76    {
77        let source = Some(source.into());
78        Self { kind, source }
79    }
80
81    /// Returns the corresponding [`PdbErrorKind`] for this error.
82    pub fn kind(&self) -> PdbErrorKind {
83        self.kind
84    }
85}
86
87impl From<PdbErrorKind> for PdbError {
88    fn from(kind: PdbErrorKind) -> Self {
89        Self { kind, source: None }
90    }
91}
92
93impl From<pdb::Error> for PdbError {
94    fn from(e: pdb::Error) -> Self {
95        Self::new(PdbErrorKind::BadObject, e)
96    }
97}
98
99impl From<fmt::Error> for PdbError {
100    fn from(e: fmt::Error) -> Self {
101        Self::new(PdbErrorKind::FormattingFailed, e)
102    }
103}
104
105impl From<pdb_addr2line::Error> for PdbError {
106    fn from(e: pdb_addr2line::Error) -> Self {
107        match e {
108            pdb_addr2line::Error::PdbError(e) => Self::new(PdbErrorKind::BadObject, e),
109            pdb_addr2line::Error::FormatError(e) => Self::new(PdbErrorKind::FormattingFailed, e),
110            e => Self::new(PdbErrorKind::FormattingFailed, e),
111        }
112    }
113}
114
115/// Program Database, the debug companion format on Windows.
116///
117/// This object is a sole debug companion to [`PeObject`](../pdb/struct.PdbObject.html).
118pub struct PdbObject<'data> {
119    pdb: Arc<RwLock<Pdb<'data>>>,
120    debug_info: Arc<pdb::DebugInformation<'data>>,
121    pdb_info: pdb::PDBInformation<'data>,
122    public_syms: pdb::SymbolTable<'data>,
123    executable_sections: ExecutableSections,
124    data: &'data [u8],
125}
126
127// NB: The pdb crate simulates mmap behavior on any Read + Seek type. This implementation requires
128// mutability of the `Source` and uses trait objects without a Send + Sync barrier. We know that we
129// only instanciate `&[u8]` as source. Whenever we mutate the reader (to read a new module stream),
130// we acquire a write lock on the PDB, which should be sufficient.
131unsafe impl Send for PdbObject<'_> {}
132unsafe impl Sync for PdbObject<'_> {}
133
134impl<'data> PdbObject<'data> {
135    /// Tests whether the buffer could contain an PDB object.
136    pub fn test(data: &[u8]) -> bool {
137        // NB: "Microsoft C/C++ program database 2.00" is not supported by the pdb crate, so there
138        // is no point in pretending we could read it.
139        data.starts_with(MAGIC_BIG)
140    }
141
142    /// Tries to parse a PDB object from the given slice.
143    #[allow(clippy::arc_with_non_send_sync)]
144    pub fn parse(data: &'data [u8]) -> Result<Self, PdbError> {
145        let mut pdb = Pdb::open(Cursor::new(data))?;
146        let dbi = pdb.debug_information()?;
147        let pdbi = pdb.pdb_information()?;
148        let pubi = pdb.global_symbols()?;
149        let sections = pdb.sections()?;
150
151        Ok(PdbObject {
152            pdb: Arc::new(RwLock::new(pdb)),
153            debug_info: Arc::new(dbi),
154            pdb_info: pdbi,
155            public_syms: pubi,
156            data,
157            executable_sections: ExecutableSections::from_sections(&sections),
158        })
159    }
160
161    /// The container file format, which is always `FileFormat::Pdb`.
162    pub fn file_format(&self) -> FileFormat {
163        FileFormat::Pdb
164    }
165
166    /// The code identifier of this object, always `None`.
167    ///
168    /// PDB files do not contain sufficient information to compute the code identifier, since they
169    /// are lacking the relevant parts of the PE header.
170    pub fn code_id(&self) -> Option<CodeId> {
171        None
172    }
173
174    /// The debug information identifier of this PDB.
175    ///
176    /// The PDB stores a specific header that contains GUID and age bits. Additionally, Microsoft
177    /// uses the file name of the PDB to avoid GUID collisions. In most contexts, however, it is
178    /// sufficient to rely on the uniqueness of the GUID to identify a PDB.
179    ///
180    /// The same information is also stored in a header in the corresponding PE file, which can be
181    /// used to locate a PDB from a PE.
182    pub fn debug_id(&self) -> DebugId {
183        // Prefer the age from the debug information stream, as it is more likely to correspond to
184        // the executable than the PDB info header. The latter is often bumped independently when
185        // the PDB is processed or optimized, which causes it to go out of sync with the original
186        // image.
187        let age = self.debug_info.age().unwrap_or(self.pdb_info.age);
188        match Uuid::from_slice(&self.pdb_info.guid.as_bytes()[..]) {
189            Ok(uuid) => DebugId::from_parts(uuid, age),
190            Err(_) => DebugId::default(),
191        }
192    }
193
194    /// The CPU architecture of this object, as specified in the debug information stream (DBI).
195    pub fn arch(&self) -> Arch {
196        self.debug_info
197            .machine_type()
198            .ok()
199            .map(arch_from_machine)
200            .unwrap_or_default()
201    }
202
203    /// The kind of this object, which is always `Debug`.
204    pub fn kind(&self) -> ObjectKind {
205        ObjectKind::Debug
206    }
207
208    /// The address at which the image prefers to be loaded into memory.
209    ///
210    /// The PDB only stores relative addresses, and more importantly, does not provide sufficient
211    /// information to compute the original PE's load address. The according PE, however does
212    /// feature a load address (called `image_base`). See [`PeObject::load_address`] for more
213    /// information.
214    ///
215    /// [`PeObject::load_address`]: ../pe/struct.PeObject.html#method.load_address
216    pub fn load_address(&self) -> u64 {
217        0
218    }
219
220    /// Determines whether this object exposes a public symbol table.
221    pub fn has_symbols(&self) -> bool {
222        // We can safely assume that PDBs will always contain symbols.
223        true
224    }
225
226    /// Returns an iterator over symbols in the public symbol table.
227    pub fn symbols(&self) -> PdbSymbolIterator<'data, '_> {
228        PdbSymbolIterator {
229            symbols: self.public_syms.iter(),
230            address_map: self.pdb.write().address_map().ok(),
231            executable_sections: &self.executable_sections,
232        }
233    }
234
235    /// Returns an ordered map of symbols in the symbol table.
236    pub fn symbol_map(&self) -> SymbolMap<'data> {
237        self.symbols().collect()
238    }
239
240    /// Determines whether this object contains debug information.
241    pub fn has_debug_info(&self) -> bool {
242        // There is no cheap way to find out if a PDB contains debugging information that we care
243        // about. Effectively, we're interested in local symbols declared in the module info
244        // streams. To reliably determine whether any stream is present, we'd have to probe each one
245        // of them, which can result in quite a lot of disk I/O.
246        true
247    }
248
249    /// Determines whether this object contains embedded source.
250    pub fn has_sources(&self) -> bool {
251        false
252    }
253
254    /// Determines whether this object is malformed and was only partially parsed
255    pub fn is_malformed(&self) -> bool {
256        false
257    }
258
259    /// Constructs a debugging session.
260    pub fn debug_session(&self) -> Result<PdbDebugSession<'data>, PdbError> {
261        PdbDebugSession::build(self)
262    }
263
264    /// Determines whether this object contains stack unwinding information.
265    pub fn has_unwind_info(&self) -> bool {
266        // The PDB crate currently loads quite a lot of information from the PDB when accessing the
267        // frame table. However, we expect unwind info in every PDB for 32-bit builds, so we can
268        // just assume it's there if the architecture matches.
269        // TODO: Implement a better way by exposing the extra streams in the PDB crate.
270        self.arch().cpu_family() == CpuFamily::Intel32
271    }
272
273    /// Returns the raw data of the ELF file.
274    pub fn data(&self) -> &'data [u8] {
275        self.data
276    }
277
278    #[doc(hidden)]
279    pub fn inner(&self) -> &RwLock<Pdb<'data>> {
280        &self.pdb
281    }
282}
283
284impl fmt::Debug for PdbObject<'_> {
285    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
286        f.debug_struct("PdbObject")
287            .field("debug_id", &self.debug_id())
288            .field("arch", &self.arch())
289            .field("load_address", &format_args!("{:#x}", self.load_address()))
290            .field("has_symbols", &self.has_symbols())
291            .field("has_debug_info", &self.has_debug_info())
292            .field("has_unwind_info", &self.has_unwind_info())
293            .field("is_malformed", &self.is_malformed())
294            .finish()
295    }
296}
297
298impl<'slf, 'data: 'slf> AsSelf<'slf> for PdbObject<'data> {
299    type Ref = PdbObject<'slf>;
300
301    fn as_self(&'slf self) -> &'slf Self::Ref {
302        unsafe { std::mem::transmute(self) }
303    }
304}
305
306impl<'data> Parse<'data> for PdbObject<'data> {
307    type Error = PdbError;
308
309    fn test(data: &[u8]) -> bool {
310        Self::test(data)
311    }
312
313    fn parse(data: &'data [u8]) -> Result<Self, PdbError> {
314        Self::parse(data)
315    }
316}
317
318impl<'data: 'object, 'object> ObjectLike<'data, 'object> for PdbObject<'data> {
319    type Error = PdbError;
320    type Session = PdbDebugSession<'data>;
321    type SymbolIterator = PdbSymbolIterator<'data, 'object>;
322
323    fn file_format(&self) -> FileFormat {
324        self.file_format()
325    }
326
327    fn code_id(&self) -> Option<CodeId> {
328        self.code_id()
329    }
330
331    fn debug_id(&self) -> DebugId {
332        self.debug_id()
333    }
334
335    fn arch(&self) -> Arch {
336        self.arch()
337    }
338
339    fn kind(&self) -> ObjectKind {
340        self.kind()
341    }
342
343    fn load_address(&self) -> u64 {
344        self.load_address()
345    }
346
347    fn has_symbols(&self) -> bool {
348        self.has_symbols()
349    }
350
351    fn symbols(&'object self) -> Self::SymbolIterator {
352        self.symbols()
353    }
354
355    fn symbol_map(&self) -> SymbolMap<'data> {
356        self.symbol_map()
357    }
358
359    fn has_debug_info(&self) -> bool {
360        self.has_debug_info()
361    }
362
363    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
364        self.debug_session()
365    }
366
367    fn has_unwind_info(&self) -> bool {
368        self.has_unwind_info()
369    }
370
371    fn has_sources(&self) -> bool {
372        self.has_sources()
373    }
374
375    fn is_malformed(&self) -> bool {
376        self.is_malformed()
377    }
378}
379
380pub(crate) fn arch_from_machine(machine: MachineType) -> Arch {
381    match machine {
382        MachineType::X86 => Arch::X86,
383        MachineType::Amd64 => Arch::Amd64,
384        MachineType::Arm => Arch::Arm,
385        MachineType::Arm64 => Arch::Arm64,
386        MachineType::PowerPC => Arch::Ppc,
387        _ => Arch::Unknown,
388    }
389}
390
391/// Contains information about which sections are executable.
392struct ExecutableSections {
393    /// For every section header in the PDB, a boolean which indicates whether the "executable"
394    /// or "execute" flag is set in the section header's characteristics.
395    is_executable_per_section: Vec<bool>,
396}
397
398impl ExecutableSections {
399    pub fn from_sections(sections: &Option<Vec<ImageSectionHeader>>) -> Self {
400        Self {
401            is_executable_per_section: match sections {
402                Some(sections) => sections
403                    .iter()
404                    .map(|section| section.characteristics)
405                    .map(|char| char.executable() || char.execute())
406                    .collect(),
407                None => Default::default(),
408            },
409        }
410    }
411
412    /// Returns whether the given offset is contained in an executable section.
413    pub fn contains(&self, offset: &PdbInternalSectionOffset) -> bool {
414        // offset.section is a one-based index.
415        if offset.section == 0 {
416            // No section.
417            return false;
418        }
419
420        let section_index = (offset.section - 1) as usize;
421        self.is_executable_per_section
422            .get(section_index)
423            .cloned()
424            .unwrap_or(false)
425    }
426}
427
428/// An iterator over symbols in the PDB file.
429///
430/// Returned by [`PdbObject::symbols`](struct.PdbObject.html#method.symbols).
431pub struct PdbSymbolIterator<'data, 'object> {
432    symbols: pdb::SymbolIter<'object>,
433    address_map: Option<AddressMap<'data>>,
434    executable_sections: &'object ExecutableSections,
435}
436
437impl<'data> Iterator for PdbSymbolIterator<'data, '_> {
438    type Item = Symbol<'data>;
439
440    fn next(&mut self) -> Option<Self::Item> {
441        let address_map = self.address_map.as_ref()?;
442
443        while let Ok(Some(symbol)) = self.symbols.next() {
444            if let Ok(SymbolData::Public(public)) = symbol.parse() {
445                if !self.executable_sections.contains(&public.offset) {
446                    continue;
447                }
448
449                let address = match public.offset.to_rva(address_map) {
450                    Some(address) => address,
451                    None => continue,
452                };
453
454                // pdb::SymbolIter offers data bound to its own lifetime since it holds the
455                // buffer containing public symbols. The contract requires that we return
456                // `Symbol<'data>`, so we cannot return zero-copy symbols here.
457                let cow = public.name.to_string();
458                let name = Cow::from(String::from(cow));
459
460                return Some(Symbol {
461                    name: Some(name),
462                    address: u64::from(address.0),
463                    size: 0, // Computed in `SymbolMap`
464                });
465            }
466        }
467
468        None
469    }
470}
471
472struct PdbStreams<'d> {
473    debug_info: Arc<pdb::DebugInformation<'d>>,
474    type_info: pdb::TypeInformation<'d>,
475    id_info: pdb::IdInformation<'d>,
476    string_table: Option<pdb::StringTable<'d>>,
477
478    pdb: Arc<RwLock<Pdb<'d>>>,
479
480    /// ModuleInfo objects are stored on this object (outside PdbDebugInfo) so that the
481    /// PdbDebugInfo can store a TypeFormatter, which has a lifetime dependency on its
482    /// ModuleProvider, which is this PdbStreams. This is so that TypeFormatter can cache
483    /// CrossModuleImports inside itself, and those have a lifetime dependency on the
484    /// ModuleInfo.
485    module_infos: FrozenMap<usize, Box<ModuleInfo<'d>>>,
486}
487
488impl<'d> PdbStreams<'d> {
489    fn from_pdb(pdb: &PdbObject<'d>) -> Result<Self, PdbError> {
490        let mut p = pdb.pdb.write();
491
492        // PDB::string_table errors if the named stream for the string table is not present.
493        // However, this occurs in certain PDBs and does not automatically indicate an error.
494        let string_table = match p.string_table() {
495            Ok(string_table) => Some(string_table),
496            Err(pdb::Error::StreamNameNotFound) => None,
497            Err(e) => return Err(e.into()),
498        };
499
500        Ok(Self {
501            string_table,
502            debug_info: pdb.debug_info.clone(),
503            type_info: p.type_information()?,
504            id_info: p.id_information()?,
505            pdb: pdb.pdb.clone(),
506            module_infos: FrozenMap::new(),
507        })
508    }
509}
510
511impl<'d> pdb_addr2line::ModuleProvider<'d> for PdbStreams<'d> {
512    fn get_module_info(
513        &self,
514        module_index: usize,
515        module: &Module,
516    ) -> Result<Option<&ModuleInfo<'d>>, pdb::Error> {
517        if let Some(module_info) = self.module_infos.get(&module_index) {
518            return Ok(Some(module_info));
519        }
520
521        let mut pdb = self.pdb.write();
522        Ok(pdb.module_info(module)?.map(|module_info| {
523            self.module_infos
524                .insert(module_index, Box::new(module_info))
525        }))
526    }
527}
528
529struct PdbDebugInfo<'d> {
530    /// The streams, to load module streams on demand.
531    streams: &'d PdbStreams<'d>,
532    /// OMAP structure to map reordered sections to RVAs.
533    address_map: pdb::AddressMap<'d>,
534    /// String table for name lookups.
535    string_table: Option<&'d pdb::StringTable<'d>>,
536    /// Type formatter for function name strings.
537    type_formatter: pdb_addr2line::TypeFormatter<'d, 'd>,
538}
539
540impl<'d> PdbDebugInfo<'d> {
541    fn build(pdb: &PdbObject<'d>, streams: &'d PdbStreams<'d>) -> Result<Self, PdbError> {
542        let modules = streams.debug_info.modules()?.collect::<Vec<_>>()?;
543
544        // Avoid deadlocks by only covering the two access to the address map. For
545        // instance, `pdb.symbol_map()` requires a mutable borrow of the PDB as well.
546        let mut p = pdb.pdb.write();
547        let address_map = p.address_map()?;
548
549        drop(p);
550
551        Ok(PdbDebugInfo {
552            address_map,
553            streams,
554            string_table: streams.string_table.as_ref(),
555            type_formatter: pdb_addr2line::TypeFormatter::new_from_parts(
556                streams,
557                modules,
558                &streams.debug_info,
559                &streams.type_info,
560                &streams.id_info,
561                streams.string_table.as_ref(),
562                Default::default(),
563            )?,
564        })
565    }
566
567    /// Returns an iterator over all compilation units (modules).
568    fn units(&'d self) -> PdbUnitIterator<'d> {
569        PdbUnitIterator {
570            debug_info: self,
571            index: 0,
572        }
573    }
574
575    fn modules(&self) -> &[Module<'d>] {
576        self.type_formatter.modules()
577    }
578
579    fn get_module(&'d self, index: usize) -> Result<Option<&'d ModuleInfo<'d>>, PdbError> {
580        // Silently ignore module references out-of-bound
581        let module = match self.modules().get(index) {
582            Some(module) => module,
583            None => return Ok(None),
584        };
585
586        Ok(self.streams.get_module_info(index, module)?)
587    }
588
589    fn file_info(&self, file_info: pdb::FileInfo<'d>) -> Result<FileInfo<'_>, PdbError> {
590        let file_path = match self.string_table {
591            Some(string_table) => file_info.name.to_raw_string(string_table)?,
592            None => "".into(),
593        };
594
595        Ok(FileInfo::from_path(file_path.as_bytes()))
596    }
597}
598
599impl<'slf, 'd: 'slf> AsSelf<'slf> for PdbDebugInfo<'d> {
600    type Ref = PdbDebugInfo<'slf>;
601
602    fn as_self(&'slf self) -> &'slf Self::Ref {
603        unsafe { std::mem::transmute(self) }
604    }
605}
606
607/// Debug session for PDB objects.
608pub struct PdbDebugSession<'d> {
609    cell: SelfCell<Box<PdbStreams<'d>>, PdbDebugInfo<'d>>,
610}
611
612impl<'d> PdbDebugSession<'d> {
613    fn build(pdb: &PdbObject<'d>) -> Result<Self, PdbError> {
614        let streams = PdbStreams::from_pdb(pdb)?;
615        let cell = SelfCell::try_new(Box::new(streams), |streams| {
616            PdbDebugInfo::build(pdb, unsafe { &*streams })
617        })?;
618
619        Ok(PdbDebugSession { cell })
620    }
621
622    /// Returns an iterator over all source files in this debug file.
623    pub fn files(&self) -> PdbFileIterator<'_> {
624        PdbFileIterator {
625            debug_info: self.cell.get(),
626            units: self.cell.get().units(),
627            files: pdb::FileIterator::default(),
628            finished: false,
629        }
630    }
631
632    /// Returns an iterator over all functions in this debug file.
633    pub fn functions(&self) -> PdbFunctionIterator<'_> {
634        PdbFunctionIterator {
635            units: self.cell.get().units(),
636            functions: Vec::new().into_iter(),
637            finished: false,
638        }
639    }
640
641    /// See [DebugSession::source_by_path] for more information.
642    pub fn source_by_path(
643        &self,
644        _path: &str,
645    ) -> Result<Option<SourceFileDescriptor<'_>>, PdbError> {
646        Ok(None)
647    }
648}
649
650impl<'session> DebugSession<'session> for PdbDebugSession<'_> {
651    type Error = PdbError;
652    type FunctionIterator = PdbFunctionIterator<'session>;
653    type FileIterator = PdbFileIterator<'session>;
654
655    fn functions(&'session self) -> Self::FunctionIterator {
656        self.functions()
657    }
658
659    fn files(&'session self) -> Self::FileIterator {
660        self.files()
661    }
662
663    fn source_by_path(&self, path: &str) -> Result<Option<SourceFileDescriptor<'_>>, Self::Error> {
664        self.source_by_path(path)
665    }
666}
667
668struct Unit<'s> {
669    debug_info: &'s PdbDebugInfo<'s>,
670    module_index: usize,
671    module: &'s pdb::ModuleInfo<'s>,
672}
673
674impl<'s> Unit<'s> {
675    fn load(
676        debug_info: &'s PdbDebugInfo<'s>,
677        module_index: usize,
678        module: &'s pdb::ModuleInfo<'s>,
679    ) -> Result<Self, PdbError> {
680        Ok(Self {
681            debug_info,
682            module_index,
683            module,
684        })
685    }
686
687    fn collect_lines<I>(
688        &self,
689        mut line_iter: I,
690        program: &LineProgram<'s>,
691    ) -> Result<Vec<LineInfo<'s>>, PdbError>
692    where
693        I: FallibleIterator<Item = pdb::LineInfo>,
694        PdbError: From<I::Error>,
695    {
696        let address_map = &self.debug_info.address_map;
697
698        let mut lines = Vec::new();
699        while let Some(line_info) = line_iter.next()? {
700            let rva = match line_info.offset.to_rva(address_map) {
701                Some(rva) => u64::from(rva.0),
702                None => continue,
703            };
704
705            // skip 0-sized line infos
706            let size = line_info.length.map(u64::from);
707            if size == Some(0) {
708                continue;
709            }
710
711            let file_info = program.get_file_info(line_info.file_index)?;
712
713            lines.push(LineInfo {
714                address: rva,
715                size,
716                file: self.debug_info.file_info(file_info)?,
717                line: line_info.line_start.into(),
718            });
719        }
720        lines.sort_by_key(|line| line.address);
721
722        // Merge line infos that only differ in their `column` information, which we don't
723        // care about. We only want to output line infos that differ in their file/line.
724        lines.dedup_by(|current, prev| {
725            // the records need to be consecutive to be able to merge
726            let first_end = prev.size.and_then(|size| prev.address.checked_add(size));
727            let is_consecutive = first_end == Some(current.address);
728            // the line record points to the same file/line, so we want to merge/dedupe it
729            if is_consecutive && prev.file == current.file && prev.line == current.line {
730                prev.size = prev
731                    .size
732                    .map(|first_size| first_size.saturating_add(current.size.unwrap_or(0)));
733
734                return true;
735            }
736            false
737        });
738
739        Ok(lines)
740    }
741
742    /// Sanitize the collected lines.
743    ///
744    /// This essentially filters out all the lines that lay outside of the function range.
745    ///
746    /// For example we have observed in a real-world pdb that has:
747    /// - A function 0x33ea50 (size 0xc)
748    /// - With one line record: 0x33e850 (size 0x26)
749    ///
750    /// The line record is completely outside the range of the function.
751    fn sanitize_lines(func: &mut Function) {
752        let fn_start = func.address;
753        let fn_end = func.end_address();
754        func.lines.retain(|line| {
755            if line.address >= fn_end {
756                return false;
757            }
758            let line_end = match line.size {
759                Some(size) => line.address.saturating_add(size),
760                None => return true,
761            };
762            line_end > fn_start
763        });
764    }
765
766    fn handle_function(
767        &self,
768        offset: PdbInternalSectionOffset,
769        len: u32,
770        name: RawString<'s>,
771        type_index: TypeIndex,
772        program: &LineProgram<'s>,
773    ) -> Result<Option<Function<'s>>, PdbError> {
774        let address_map = &self.debug_info.address_map;
775
776        // Translate the function's address to the PE's address space. If this fails, we're
777        // likely dealing with an invalid function and can skip it.
778        let address = match offset.to_rva(address_map) {
779            Some(addr) => u64::from(addr.0),
780            None => return Ok(None),
781        };
782
783        // Names from the private symbol table are generally demangled. They contain the path of the
784        // scope and name of the function itself, including type parameters, and the parameter lists
785        // are contained in the type info. We do not emit a return type.
786        let formatter = &self.debug_info.type_formatter;
787        let name = name.to_string();
788        let name = Name::new(
789            formatter
790                .format_function(&name, self.module_index, type_index)
791                .map(Cow::Owned)
792                .unwrap_or(name),
793            NameMangling::Unmangled,
794            Language::Unknown,
795        );
796
797        let line_iter = program.lines_for_symbol(offset);
798        let lines = self.collect_lines(line_iter, program)?;
799
800        Ok(Some(Function {
801            address,
802            size: len.into(),
803            name,
804            compilation_dir: &[],
805            lines,
806            inlinees: Vec::new(),
807            inline: false,
808        }))
809    }
810
811    fn handle_procedure(
812        &self,
813        proc: &ProcedureSymbol<'s>,
814        program: &LineProgram<'s>,
815    ) -> Result<Option<Function<'s>>, PdbError> {
816        self.handle_function(proc.offset, proc.len, proc.name, proc.type_index, program)
817    }
818
819    fn handle_separated_code(
820        &self,
821        proc: &ProcedureSymbol<'s>,
822        sepcode: &SeparatedCodeSymbol,
823        program: &LineProgram<'s>,
824    ) -> Result<Option<Function<'s>>, PdbError> {
825        self.handle_function(
826            sepcode.offset,
827            sepcode.len,
828            proc.name,
829            proc.type_index,
830            program,
831        )
832    }
833
834    fn handle_inlinee(
835        &self,
836        inline_site: InlineSiteSymbol<'s>,
837        parent_offset: PdbInternalSectionOffset,
838        inlinee: &pdb::Inlinee<'s>,
839        program: &LineProgram<'s>,
840    ) -> Result<Option<Function<'s>>, PdbError> {
841        let line_iter = inlinee.lines(parent_offset, &inline_site);
842        let lines = self.collect_lines(line_iter, program)?;
843
844        // If there are no line records, skip this inline function completely. Apparently, it was
845        // eliminated by the compiler, and cannot be hit by the program anymore. For `symbolic`,
846        // such functions do not have any use.
847        let start = match lines.first().map(|line| line.address) {
848            Some(address) => address,
849            None => return Ok(None),
850        };
851
852        let end = match lines
853            .last()
854            .map(|line| line.address + line.size.unwrap_or(1))
855        {
856            Some(address) => address,
857            None => return Ok(None),
858        };
859
860        let formatter = &self.debug_info.type_formatter;
861        let name = Name::new(
862            formatter.format_id(self.module_index, inline_site.inlinee)?,
863            NameMangling::Unmangled,
864            Language::Unknown,
865        );
866
867        Ok(Some(Function {
868            address: start,
869            size: end - start,
870            name,
871            compilation_dir: &[],
872            lines,
873            inlinees: Vec::new(),
874            inline: true,
875        }))
876    }
877
878    fn functions(&self) -> Result<Vec<Function<'s>>, PdbError> {
879        let program = self.module.line_program()?;
880        let mut symbols = self.module.symbols()?;
881
882        // Depending on the compiler version, the inlinee table might not be sorted. Since constant
883        // search through inlinees is too slow (due to repeated parsing), but Inlinees are rather
884        // small structures, it is relatively cheap to collect them into an in-memory index.
885        let inlinees: BTreeMap<_, _> = self
886            .module
887            .inlinees()?
888            .map(|i| Ok((i.index(), i)))
889            .collect()?;
890
891        let mut depth = 0;
892        let mut inc_next = false;
893        let mut skipped_depth = None;
894
895        let mut functions = Vec::new();
896        let mut stack = FunctionStack::new();
897        let mut proc_offsets = SmallVec::<[_; 3]>::new();
898        let mut last_proc = None;
899
900        while let Some(symbol) = symbols.next()? {
901            if inc_next {
902                depth += 1;
903            }
904
905            inc_next = symbol.starts_scope();
906            if symbol.ends_scope() {
907                depth -= 1;
908
909                if proc_offsets.last().is_some_and(|&(d, _)| d >= depth) {
910                    proc_offsets.pop();
911                }
912            }
913
914            // If we're navigating within a skipped function (see below), we can ignore this
915            // entry completely. Otherwise, we've moved out of any skipped function and can
916            // reset the stored depth.
917            match skipped_depth {
918                Some(skipped) if depth > skipped => continue,
919                _ => skipped_depth = None,
920            }
921
922            // Flush all functions out that exceed the current iteration depth. Since we
923            // encountered a symbol at this level, there will be no more inlinees to the
924            // previous function at the same level or any of it's children.
925            if symbol.ends_scope() {
926                stack.flush(depth, &mut functions);
927            }
928
929            let function = match symbol.parse() {
930                Ok(SymbolData::Procedure(proc)) => {
931                    proc_offsets.push((depth, proc.offset));
932                    let function = self.handle_procedure(&proc, &program)?;
933                    last_proc = Some(proc);
934                    function
935                }
936                Ok(SymbolData::SeparatedCode(sepcode)) => match last_proc.as_ref() {
937                    Some(last_proc) if last_proc.offset == sepcode.parent_offset => {
938                        self.handle_separated_code(last_proc, &sepcode, &program)?
939                    }
940                    _ => continue,
941                },
942                Ok(SymbolData::InlineSite(site)) => {
943                    let parent_offset = proc_offsets
944                        .last()
945                        .map(|&(_, offset)| offset)
946                        .ok_or(PdbErrorKind::UnexpectedInline)?;
947
948                    // We can assume that inlinees will be listed in the inlinee table. If missing,
949                    // skip silently instead of erroring out. Missing a single inline function is
950                    // more acceptable in such a case than halting iteration completely.
951                    if let Some(inlinee) = inlinees.get(&site.inlinee) {
952                        // We have seen that the MSVC Compiler `19.16` (VS 2017) can output
953                        // `ChangeFile` annotations which are not properly aligned to the beginning
954                        // of a file checksum, leading to `UnimplementedFileChecksumKind` errors.
955                        // Investigation showed that this can happen for inlined `{ctor}` functions,
956                        // but there are no clear leads to why that might have happened, and how to
957                        // recover from these broken annotations.
958                        // For that reason, we skip these inlinees completely so we do not fail
959                        // processing the complete pdb file.
960                        self.handle_inlinee(site, parent_offset, inlinee, &program)
961                            .ok()
962                            .flatten()
963                    } else {
964                        None
965                    }
966                }
967                // We need to ignore errors here since the PDB crate does not yet implement all
968                // symbol types. Instead of erroring too often, it's better to swallow these.
969                _ => continue,
970            };
971
972            match function {
973                Some(mut function) => {
974                    Self::sanitize_lines(&mut function);
975                    // TODO: figure out what to do with functions that have no more lines
976                    // after sanitization
977                    stack.push(depth, function)
978                }
979                None => skipped_depth = Some(depth),
980            }
981        }
982
983        // We're done, flush the remaining stack.
984        stack.flush(0, &mut functions);
985
986        Ok(functions)
987    }
988}
989
990struct PdbUnitIterator<'s> {
991    debug_info: &'s PdbDebugInfo<'s>,
992    index: usize,
993}
994
995impl<'s> Iterator for PdbUnitIterator<'s> {
996    type Item = Result<Unit<'s>, PdbError>;
997
998    fn next(&mut self) -> Option<Self::Item> {
999        let debug_info = self.debug_info;
1000        while self.index < debug_info.modules().len() {
1001            let module_index = self.index;
1002            let result = debug_info.get_module(module_index);
1003            self.index += 1;
1004
1005            let module = match result {
1006                Ok(Some(module)) => module,
1007                Ok(None) => continue,
1008                Err(error) => return Some(Err(error)),
1009            };
1010
1011            return Some(Unit::load(debug_info, module_index, module));
1012        }
1013
1014        None
1015    }
1016}
1017
1018/// An iterator over source files in a Pdb object.
1019pub struct PdbFileIterator<'s> {
1020    debug_info: &'s PdbDebugInfo<'s>,
1021    units: PdbUnitIterator<'s>,
1022    files: pdb::FileIterator<'s>,
1023    finished: bool,
1024}
1025
1026impl<'s> Iterator for PdbFileIterator<'s> {
1027    type Item = Result<FileEntry<'s>, PdbError>;
1028
1029    fn next(&mut self) -> Option<Self::Item> {
1030        if self.finished {
1031            return None;
1032        }
1033
1034        loop {
1035            if let Some(file_result) = self.files.next().transpose() {
1036                let result = file_result
1037                    .map_err(|err| err.into())
1038                    .and_then(|i| self.debug_info.file_info(i))
1039                    .map(|info| FileEntry::new(Cow::default(), info));
1040
1041                return Some(result);
1042            }
1043
1044            let unit = match self.units.next() {
1045                Some(Ok(unit)) => unit,
1046                Some(Err(error)) => return Some(Err(error)),
1047                None => break,
1048            };
1049
1050            let line_program = match unit.module.line_program() {
1051                Ok(line_program) => line_program,
1052                Err(error) => return Some(Err(error.into())),
1053            };
1054
1055            self.files = line_program.files();
1056        }
1057
1058        self.finished = true;
1059        None
1060    }
1061}
1062
1063/// An iterator over functions in a PDB file.
1064pub struct PdbFunctionIterator<'s> {
1065    units: PdbUnitIterator<'s>,
1066    functions: std::vec::IntoIter<Function<'s>>,
1067    finished: bool,
1068}
1069
1070impl<'s> Iterator for PdbFunctionIterator<'s> {
1071    type Item = Result<Function<'s>, PdbError>;
1072
1073    fn next(&mut self) -> Option<Self::Item> {
1074        if self.finished {
1075            return None;
1076        }
1077
1078        loop {
1079            if let Some(func) = self.functions.next() {
1080                return Some(Ok(func));
1081            }
1082
1083            let unit = match self.units.next() {
1084                Some(Ok(unit)) => unit,
1085                Some(Err(error)) => return Some(Err(error)),
1086                None => break,
1087            };
1088
1089            self.functions = match unit.functions() {
1090                Ok(functions) => functions.into_iter(),
1091                Err(error) => return Some(Err(error)),
1092            };
1093        }
1094
1095        self.finished = true;
1096        None
1097    }
1098}
1099
1100impl std::iter::FusedIterator for PdbFunctionIterator<'_> {}