Skip to main content

symbolic_debuginfo/pdb/
mod.rs

1//! Support for Program Database, the debug companion format on Windows.
2
3use std::borrow::Cow;
4use std::collections::btree_map::BTreeMap;
5use std::error::Error;
6use std::fmt;
7use std::io::Cursor;
8use std::sync::Arc;
9
10use elsa::FrozenMap;
11use parking_lot::RwLock;
12use pdb_addr2line::pdb::{
13    AddressMap, FallibleIterator, ImageSectionHeader, InlineSiteSymbol, LineProgram, MachineType,
14    Module, ModuleInfo, PdbInternalSectionOffset, ProcedureSymbol, RawString, SeparatedCodeSymbol,
15    SymbolData, TypeIndex,
16};
17use pdb_addr2line::ModuleProvider;
18use smallvec::SmallVec;
19use thiserror::Error;
20
21use symbolic_common::{
22    Arch, AsSelf, CodeId, CpuFamily, DebugId, Language, Name, NameMangling, SelfCell, Uuid,
23};
24
25use crate::base::*;
26use crate::function_stack::FunctionStack;
27use crate::pdb::srcsrv::{SourceServerInfo, SourceServerMappings};
28use crate::sourcebundle::SourceFileDescriptor;
29
30mod srcsrv;
31
32type Pdb<'data> = pdb::PDB<'data, Cursor<&'data [u8]>>;
33
34const MAGIC_BIG: &[u8] = b"Microsoft C/C++ MSF 7.00\r\n\x1a\x44\x53\x00\x00\x00";
35
36// Used for CFI, remove once abstraction is complete
37#[doc(hidden)]
38pub use pdb_addr2line::pdb;
39
40/// The error type for [`PdbError`].
41#[non_exhaustive]
42#[derive(Clone, Copy, Debug, PartialEq, Eq)]
43pub enum PdbErrorKind {
44    /// The PDB file is corrupted. See the cause for more information.
45    BadObject,
46
47    /// An inline record was encountered without an inlining parent.
48    UnexpectedInline,
49
50    /// Formatting of a type name failed.
51    FormattingFailed,
52}
53
54impl fmt::Display for PdbErrorKind {
55    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56        match self {
57            Self::BadObject => write!(f, "invalid pdb file"),
58            Self::UnexpectedInline => write!(f, "unexpected inline function without parent"),
59            Self::FormattingFailed => write!(f, "failed to format type name"),
60        }
61    }
62}
63
64/// An error when dealing with [`PdbObject`](struct.PdbObject.html).
65#[derive(Debug, Error)]
66#[error("{kind}")]
67pub struct PdbError {
68    kind: PdbErrorKind,
69    #[source]
70    source: Option<Box<dyn Error + Send + Sync + 'static>>,
71}
72
73impl PdbError {
74    /// Creates a new PDB error from a known kind of error as well as an arbitrary error
75    /// payload.
76    fn new<E>(kind: PdbErrorKind, source: E) -> Self
77    where
78        E: Into<Box<dyn Error + Send + Sync>>,
79    {
80        let source = Some(source.into());
81        Self { kind, source }
82    }
83
84    /// Returns the corresponding [`PdbErrorKind`] for this error.
85    pub fn kind(&self) -> PdbErrorKind {
86        self.kind
87    }
88}
89
90impl From<PdbErrorKind> for PdbError {
91    fn from(kind: PdbErrorKind) -> Self {
92        Self { kind, source: None }
93    }
94}
95
96impl From<pdb::Error> for PdbError {
97    fn from(e: pdb::Error) -> Self {
98        Self::new(PdbErrorKind::BadObject, e)
99    }
100}
101
102impl From<fmt::Error> for PdbError {
103    fn from(e: fmt::Error) -> Self {
104        Self::new(PdbErrorKind::FormattingFailed, e)
105    }
106}
107
108impl From<pdb_addr2line::Error> for PdbError {
109    fn from(e: pdb_addr2line::Error) -> Self {
110        match e {
111            pdb_addr2line::Error::PdbError(e) => Self::new(PdbErrorKind::BadObject, e),
112            pdb_addr2line::Error::FormatError(e) => Self::new(PdbErrorKind::FormattingFailed, e),
113            e => Self::new(PdbErrorKind::FormattingFailed, e),
114        }
115    }
116}
117
118/// Program Database, the debug companion format on Windows.
119///
120/// This object is a sole debug companion to [`PeObject`](../pdb/struct.PdbObject.html).
121pub struct PdbObject<'data> {
122    pdb: Arc<RwLock<Pdb<'data>>>,
123    debug_info: Arc<pdb::DebugInformation<'data>>,
124    pdb_info: pdb::PDBInformation<'data>,
125    public_syms: pdb::SymbolTable<'data>,
126    executable_sections: ExecutableSections,
127    data: &'data [u8],
128}
129
130// NB: The pdb crate simulates mmap behavior on any Read + Seek type. This implementation requires
131// mutability of the `Source` and uses trait objects without a Send + Sync barrier. We know that we
132// only instanciate `&[u8]` as source. Whenever we mutate the reader (to read a new module stream),
133// we acquire a write lock on the PDB, which should be sufficient.
134unsafe impl Send for PdbObject<'_> {}
135unsafe impl Sync for PdbObject<'_> {}
136
137impl<'data> PdbObject<'data> {
138    /// Tests whether the buffer could contain an PDB object.
139    pub fn test(data: &[u8]) -> bool {
140        // NB: "Microsoft C/C++ program database 2.00" is not supported by the pdb crate, so there
141        // is no point in pretending we could read it.
142        data.starts_with(MAGIC_BIG)
143    }
144
145    /// Tries to parse a PDB object from the given slice.
146    #[allow(clippy::arc_with_non_send_sync)]
147    pub fn parse(data: &'data [u8]) -> Result<Self, PdbError> {
148        let mut pdb = Pdb::open(Cursor::new(data))?;
149        let dbi = pdb.debug_information()?;
150        let pdbi = pdb.pdb_information()?;
151        let pubi = pdb.global_symbols()?;
152        let sections = pdb.sections()?;
153
154        Ok(PdbObject {
155            pdb: Arc::new(RwLock::new(pdb)),
156            debug_info: Arc::new(dbi),
157            pdb_info: pdbi,
158            public_syms: pubi,
159            data,
160            executable_sections: ExecutableSections::from_sections(&sections),
161        })
162    }
163
164    /// The container file format, which is always `FileFormat::Pdb`.
165    pub fn file_format(&self) -> FileFormat {
166        FileFormat::Pdb
167    }
168
169    /// The code identifier of this object, always `None`.
170    ///
171    /// PDB files do not contain sufficient information to compute the code identifier, since they
172    /// are lacking the relevant parts of the PE header.
173    pub fn code_id(&self) -> Option<CodeId> {
174        None
175    }
176
177    /// The debug information identifier of this PDB.
178    ///
179    /// The PDB stores a specific header that contains GUID and age bits. Additionally, Microsoft
180    /// uses the file name of the PDB to avoid GUID collisions. In most contexts, however, it is
181    /// sufficient to rely on the uniqueness of the GUID to identify a PDB.
182    ///
183    /// The same information is also stored in a header in the corresponding PE file, which can be
184    /// used to locate a PDB from a PE.
185    pub fn debug_id(&self) -> DebugId {
186        // Prefer the age from the debug information stream, as it is more likely to correspond to
187        // the executable than the PDB info header. The latter is often bumped independently when
188        // the PDB is processed or optimized, which causes it to go out of sync with the original
189        // image.
190        let age = self.debug_info.age().unwrap_or(self.pdb_info.age);
191        match Uuid::from_slice(&self.pdb_info.guid.as_bytes()[..]) {
192            Ok(uuid) => DebugId::from_parts(uuid, age),
193            Err(_) => DebugId::default(),
194        }
195    }
196
197    /// The CPU architecture of this object, as specified in the debug information stream (DBI).
198    pub fn arch(&self) -> Arch {
199        self.debug_info
200            .machine_type()
201            .ok()
202            .map(arch_from_machine)
203            .unwrap_or_default()
204    }
205
206    /// Returns true if this object contains source server information.
207    pub fn has_source_server_data(&self) -> Result<bool, PdbError> {
208        let mut pdb = self.pdb.write();
209        match pdb.named_stream(b"srcsrv") {
210            Ok(_) => Ok(true),
211            Err(pdb::Error::StreamNameNotFound) => {
212                // No source server info is normal for many PDBs
213                Ok(false)
214            }
215            Err(e) => Err(e.into()),
216        }
217    }
218
219    /// The kind of this object, which is always `Debug`.
220    pub fn kind(&self) -> ObjectKind {
221        ObjectKind::Debug
222    }
223
224    /// The address at which the image prefers to be loaded into memory.
225    ///
226    /// The PDB only stores relative addresses, and more importantly, does not provide sufficient
227    /// information to compute the original PE's load address. The according PE, however does
228    /// feature a load address (called `image_base`). See [`PeObject::load_address`] for more
229    /// information.
230    ///
231    /// [`PeObject::load_address`]: ../pe/struct.PeObject.html#method.load_address
232    pub fn load_address(&self) -> u64 {
233        0
234    }
235
236    /// Determines whether this object exposes a public symbol table.
237    pub fn has_symbols(&self) -> bool {
238        // We can safely assume that PDBs will always contain symbols.
239        true
240    }
241
242    /// Returns an iterator over symbols in the public symbol table.
243    pub fn symbols(&self) -> PdbSymbolIterator<'data, '_> {
244        PdbSymbolIterator {
245            symbols: self.public_syms.iter(),
246            address_map: self.pdb.write().address_map().ok(),
247            executable_sections: &self.executable_sections,
248        }
249    }
250
251    /// Returns an ordered map of symbols in the symbol table.
252    pub fn symbol_map(&self) -> SymbolMap<'data> {
253        self.symbols().collect()
254    }
255
256    /// Determines whether this object contains debug information.
257    pub fn has_debug_info(&self) -> bool {
258        // There is no cheap way to find out if a PDB contains debugging information that we care
259        // about. Effectively, we're interested in local symbols declared in the module info
260        // streams. To reliably determine whether any stream is present, we'd have to probe each one
261        // of them, which can result in quite a lot of disk I/O.
262        true
263    }
264
265    /// Determines whether this object contains embedded source.
266    pub fn has_sources(&self) -> bool {
267        false
268    }
269
270    /// Determines whether this object is malformed and was only partially parsed
271    pub fn is_malformed(&self) -> bool {
272        false
273    }
274
275    /// Constructs a debugging session.
276    pub fn debug_session(&self) -> Result<PdbDebugSession<'data>, PdbError> {
277        PdbDebugSession::build(self)
278    }
279
280    /// Determines whether this object contains stack unwinding information.
281    pub fn has_unwind_info(&self) -> bool {
282        // The PDB crate currently loads quite a lot of information from the PDB when accessing the
283        // frame table. However, we expect unwind info in every PDB for 32-bit builds, so we can
284        // just assume it's there if the architecture matches.
285        // TODO: Implement a better way by exposing the extra streams in the PDB crate.
286        self.arch().cpu_family() == CpuFamily::Intel32
287    }
288
289    /// Returns the raw data of the ELF file.
290    pub fn data(&self) -> &'data [u8] {
291        self.data
292    }
293
294    #[doc(hidden)]
295    pub fn inner(&self) -> &RwLock<Pdb<'data>> {
296        &self.pdb
297    }
298}
299
300impl fmt::Debug for PdbObject<'_> {
301    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
302        f.debug_struct("PdbObject")
303            .field("debug_id", &self.debug_id())
304            .field("arch", &self.arch())
305            .field("load_address", &format_args!("{:#x}", self.load_address()))
306            .field("has_symbols", &self.has_symbols())
307            .field("has_debug_info", &self.has_debug_info())
308            .field("has_unwind_info", &self.has_unwind_info())
309            .field("is_malformed", &self.is_malformed())
310            .finish()
311    }
312}
313
314impl<'slf, 'data: 'slf> AsSelf<'slf> for PdbObject<'data> {
315    type Ref = PdbObject<'slf>;
316
317    fn as_self(&'slf self) -> &'slf Self::Ref {
318        unsafe { std::mem::transmute(self) }
319    }
320}
321
322impl<'data> Parse<'data> for PdbObject<'data> {
323    type Error = PdbError;
324
325    fn test(data: &[u8]) -> bool {
326        Self::test(data)
327    }
328
329    fn parse(data: &'data [u8]) -> Result<Self, PdbError> {
330        Self::parse(data)
331    }
332}
333
334impl<'data: 'object, 'object> ObjectLike<'data, 'object> for PdbObject<'data> {
335    type Error = PdbError;
336    type Session = PdbDebugSession<'data>;
337    type SymbolIterator = PdbSymbolIterator<'data, 'object>;
338
339    fn file_format(&self) -> FileFormat {
340        self.file_format()
341    }
342
343    fn code_id(&self) -> Option<CodeId> {
344        self.code_id()
345    }
346
347    fn debug_id(&self) -> DebugId {
348        self.debug_id()
349    }
350
351    fn arch(&self) -> Arch {
352        self.arch()
353    }
354
355    fn kind(&self) -> ObjectKind {
356        self.kind()
357    }
358
359    fn load_address(&self) -> u64 {
360        self.load_address()
361    }
362
363    fn has_symbols(&self) -> bool {
364        self.has_symbols()
365    }
366
367    fn symbols(&'object self) -> Self::SymbolIterator {
368        self.symbols()
369    }
370
371    fn symbol_map(&self) -> SymbolMap<'data> {
372        self.symbol_map()
373    }
374
375    fn has_debug_info(&self) -> bool {
376        self.has_debug_info()
377    }
378
379    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
380        self.debug_session()
381    }
382
383    fn has_unwind_info(&self) -> bool {
384        self.has_unwind_info()
385    }
386
387    fn has_sources(&self) -> bool {
388        self.has_sources()
389    }
390
391    fn is_malformed(&self) -> bool {
392        self.is_malformed()
393    }
394}
395
396pub(crate) fn arch_from_machine(machine: MachineType) -> Arch {
397    match machine {
398        MachineType::X86 => Arch::X86,
399        MachineType::Amd64 => Arch::Amd64,
400        MachineType::Arm => Arch::Arm,
401        MachineType::Arm64 => Arch::Arm64,
402        MachineType::PowerPC => Arch::Ppc,
403        _ => Arch::Unknown,
404    }
405}
406
407/// Contains information about which sections are executable.
408struct ExecutableSections {
409    /// For every section header in the PDB, a boolean which indicates whether the "executable"
410    /// or "execute" flag is set in the section header's characteristics.
411    is_executable_per_section: Vec<bool>,
412}
413
414impl ExecutableSections {
415    pub fn from_sections(sections: &Option<Vec<ImageSectionHeader>>) -> Self {
416        Self {
417            is_executable_per_section: match sections {
418                Some(sections) => sections
419                    .iter()
420                    .map(|section| section.characteristics)
421                    .map(|char| char.executable() || char.execute())
422                    .collect(),
423                None => Default::default(),
424            },
425        }
426    }
427
428    /// Returns whether the given offset is contained in an executable section.
429    pub fn contains(&self, offset: &PdbInternalSectionOffset) -> bool {
430        // offset.section is a one-based index.
431        if offset.section == 0 {
432            // No section.
433            return false;
434        }
435
436        let section_index = (offset.section - 1) as usize;
437        self.is_executable_per_section
438            .get(section_index)
439            .cloned()
440            .unwrap_or(false)
441    }
442}
443
444/// An iterator over symbols in the PDB file.
445///
446/// Returned by [`PdbObject::symbols`](struct.PdbObject.html#method.symbols).
447pub struct PdbSymbolIterator<'data, 'object> {
448    symbols: pdb::SymbolIter<'object>,
449    address_map: Option<AddressMap<'data>>,
450    executable_sections: &'object ExecutableSections,
451}
452
453impl<'data> Iterator for PdbSymbolIterator<'data, '_> {
454    type Item = Symbol<'data>;
455
456    fn next(&mut self) -> Option<Self::Item> {
457        let address_map = self.address_map.as_ref()?;
458
459        while let Ok(Some(symbol)) = self.symbols.next() {
460            if let Ok(SymbolData::Public(public)) = symbol.parse() {
461                if !self.executable_sections.contains(&public.offset) {
462                    continue;
463                }
464
465                let address = match public.offset.to_rva(address_map) {
466                    Some(address) => address,
467                    None => continue,
468                };
469
470                // pdb::SymbolIter offers data bound to its own lifetime since it holds the
471                // buffer containing public symbols. The contract requires that we return
472                // `Symbol<'data>`, so we cannot return zero-copy symbols here.
473                let cow = public.name.to_string();
474                let name = Cow::from(String::from(cow));
475
476                return Some(Symbol {
477                    name: Some(name),
478                    address: u64::from(address.0),
479                    size: 0, // Computed in `SymbolMap`
480                });
481            }
482        }
483
484        None
485    }
486}
487
488struct PdbStreams<'d> {
489    debug_info: Arc<pdb::DebugInformation<'d>>,
490    type_info: pdb::TypeInformation<'d>,
491    id_info: pdb::IdInformation<'d>,
492    string_table: Option<pdb::StringTable<'d>>,
493    srcsrv: Option<Vec<u8>>,
494
495    pdb: Arc<RwLock<Pdb<'d>>>,
496
497    /// ModuleInfo objects are stored on this object (outside PdbDebugInfo) so that the
498    /// PdbDebugInfo can store a TypeFormatter, which has a lifetime dependency on its
499    /// ModuleProvider, which is this PdbStreams. This is so that TypeFormatter can cache
500    /// CrossModuleImports inside itself, and those have a lifetime dependency on the
501    /// ModuleInfo.
502    module_infos: FrozenMap<usize, Box<ModuleInfo<'d>>>,
503}
504
505impl<'d> PdbStreams<'d> {
506    fn from_pdb(pdb: &PdbObject<'d>) -> Result<Self, PdbError> {
507        let mut p = pdb.pdb.write();
508
509        // PDB::string_table errors if the named stream for the string table is not present.
510        // However, this occurs in certain PDBs and does not automatically indicate an error.
511        let string_table = match p.string_table() {
512            Ok(string_table) => Some(string_table),
513            Err(pdb::Error::StreamNameNotFound) => None,
514            Err(e) => return Err(e.into()),
515        };
516
517        // Try to open the "srcsrv" named stream
518        let srcsrv = match p.named_stream(b"srcsrv") {
519            Ok(stream) => Some(stream.as_slice().to_vec()),
520            Err(pdb::Error::StreamNameNotFound) => {
521                // No source server info is normal for many PDBs
522                None
523            }
524            Err(e) => return Err(e.into()),
525        };
526
527        Ok(Self {
528            string_table,
529            debug_info: pdb.debug_info.clone(),
530            type_info: p.type_information()?,
531            id_info: p.id_information()?,
532            srcsrv,
533            pdb: pdb.pdb.clone(),
534            module_infos: FrozenMap::new(),
535        })
536    }
537}
538
539impl<'d> pdb_addr2line::ModuleProvider<'d> for PdbStreams<'d> {
540    fn get_module_info(
541        &self,
542        module_index: usize,
543        module: &Module,
544    ) -> Result<Option<&ModuleInfo<'d>>, pdb::Error> {
545        if let Some(module_info) = self.module_infos.get(&module_index) {
546            return Ok(Some(module_info));
547        }
548
549        let mut pdb = self.pdb.write();
550        Ok(pdb.module_info(module)?.map(|module_info| {
551            self.module_infos
552                .insert(module_index, Box::new(module_info))
553        }))
554    }
555}
556
557struct PdbDebugInfo<'d> {
558    /// The streams, to load module streams on demand.
559    streams: &'d PdbStreams<'d>,
560    /// OMAP structure to map reordered sections to RVAs.
561    address_map: pdb::AddressMap<'d>,
562    /// String table for name lookups.
563    string_table: Option<&'d pdb::StringTable<'d>>,
564    /// Type formatter for function name strings.
565    type_formatter: pdb_addr2line::TypeFormatter<'d, 'd>,
566    srcsrv: Option<SourceServerMappings<'d>>,
567}
568
569impl<'d> PdbDebugInfo<'d> {
570    fn build(pdb: &PdbObject<'d>, streams: &'d PdbStreams<'d>) -> Result<Self, PdbError> {
571        let modules = streams.debug_info.modules()?.collect::<Vec<_>>()?;
572
573        // Avoid deadlocks by only covering the two access to the address map. For
574        // instance, `pdb.symbol_map()` requires a mutable borrow of the PDB as well.
575        let mut p = pdb.pdb.write();
576        let address_map = p.address_map()?;
577
578        drop(p);
579
580        let srcsrv = streams
581            .srcsrv
582            .as_deref()
583            // We don't want to exit on error here so we can still use the PDB
584            // file even if we fail to parse the source server part.
585            // TODO: It would be nice to surface this error to users, if and
586            // when we add logging to this crate.
587            .and_then(|stream| SourceServerMappings::parse(stream).ok());
588
589        Ok(PdbDebugInfo {
590            address_map,
591            streams,
592            string_table: streams.string_table.as_ref(),
593            srcsrv,
594            type_formatter: pdb_addr2line::TypeFormatter::new_from_parts(
595                streams,
596                modules,
597                &streams.debug_info,
598                &streams.type_info,
599                &streams.id_info,
600                streams.string_table.as_ref(),
601                Default::default(),
602            )?,
603        })
604    }
605
606    /// Returns an iterator over all compilation units (modules).
607    fn units(&'d self) -> PdbUnitIterator<'d> {
608        PdbUnitIterator {
609            debug_info: self,
610            index: 0,
611        }
612    }
613
614    fn modules(&self) -> &[Module<'d>] {
615        self.type_formatter.modules()
616    }
617
618    fn get_module(&'d self, index: usize) -> Result<Option<&'d ModuleInfo<'d>>, PdbError> {
619        // Silently ignore module references out-of-bound
620        let module = match self.modules().get(index) {
621            Some(module) => module,
622            None => return Ok(None),
623        };
624
625        Ok(self.streams.get_module_info(index, module)?)
626    }
627
628    fn file_info(&self, file_info: pdb::FileInfo<'d>) -> Result<FileInfo<'_>, PdbError> {
629        let file_path = match self.string_table {
630            Some(string_table) => file_info.name.to_raw_string(string_table)?,
631            None => "".into(),
632        };
633
634        Ok(FileInfo::from_path(file_path.as_bytes()))
635    }
636}
637
638impl<'slf, 'd: 'slf> AsSelf<'slf> for PdbDebugInfo<'d> {
639    type Ref = PdbDebugInfo<'slf>;
640
641    fn as_self(&'slf self) -> &'slf Self::Ref {
642        unsafe { std::mem::transmute(self) }
643    }
644}
645
646/// Debug session for PDB objects.
647pub struct PdbDebugSession<'d> {
648    cell: SelfCell<Box<PdbStreams<'d>>, PdbDebugInfo<'d>>,
649}
650
651impl<'d> PdbDebugSession<'d> {
652    fn build(pdb: &PdbObject<'d>) -> Result<Self, PdbError> {
653        let streams = PdbStreams::from_pdb(pdb)?;
654
655        let cell = SelfCell::try_new(Box::new(streams), |streams| {
656            PdbDebugInfo::build(pdb, unsafe { &*streams })
657        })?;
658
659        Ok(PdbDebugSession { cell })
660    }
661
662    /// Returns an iterator over all source files in this debug file.
663    pub fn files(&self) -> PdbFileIterator<'_> {
664        PdbFileIterator {
665            debug_info: self.cell.get(),
666            units: self.cell.get().units(),
667            files: pdb::FileIterator::default(),
668            finished: false,
669        }
670    }
671
672    /// Returns an iterator over all functions in this debug file.
673    pub fn functions(&self) -> PdbFunctionIterator<'_> {
674        PdbFunctionIterator {
675            units: self.cell.get().units(),
676            functions: Vec::new().into_iter(),
677            finished: false,
678        }
679    }
680
681    /// See [DebugSession::source_by_path] for more information.
682    pub fn source_by_path(
683        &self,
684        _path: &str,
685    ) -> Result<Option<SourceFileDescriptor<'_>>, PdbError> {
686        Ok(None)
687    }
688
689    /// Returns the SRCSRV VCS integration name if available.
690    ///
691    /// This extracts the version control system identifier from the SRCSRV stream,
692    /// if present. Common values include "perforce", "tfs", "git", etc.
693    /// Returns `None` if no SRCSRV stream exists or if it cannot be parsed.
694    pub fn srcsrv_vcs_name(&self) -> Option<String> {
695        self.cell
696            .get()
697            .srcsrv
698            .as_ref()
699            .map(|srcsrv| srcsrv.vcs_name().to_owned())
700    }
701}
702
703impl<'session> DebugSession<'session> for PdbDebugSession<'_> {
704    type Error = PdbError;
705    type FunctionIterator = PdbFunctionIterator<'session>;
706    type FileIterator = PdbFileIterator<'session>;
707
708    fn functions(&'session self) -> Self::FunctionIterator {
709        self.functions()
710    }
711
712    fn files(&'session self) -> Self::FileIterator {
713        self.files()
714    }
715
716    fn source_by_path(&self, path: &str) -> Result<Option<SourceFileDescriptor<'_>>, Self::Error> {
717        self.source_by_path(path)
718    }
719}
720
721struct Unit<'s> {
722    debug_info: &'s PdbDebugInfo<'s>,
723    module_index: usize,
724    module: &'s pdb::ModuleInfo<'s>,
725}
726
727impl<'s> Unit<'s> {
728    fn load(
729        debug_info: &'s PdbDebugInfo<'s>,
730        module_index: usize,
731        module: &'s pdb::ModuleInfo<'s>,
732    ) -> Result<Self, PdbError> {
733        Ok(Self {
734            debug_info,
735            module_index,
736            module,
737        })
738    }
739
740    fn collect_lines<I>(
741        &self,
742        mut line_iter: I,
743        program: &LineProgram<'s>,
744    ) -> Result<Vec<LineInfo<'s>>, PdbError>
745    where
746        I: FallibleIterator<Item = pdb::LineInfo>,
747        PdbError: From<I::Error>,
748    {
749        let address_map = &self.debug_info.address_map;
750
751        let mut lines = Vec::new();
752        while let Some(line_info) = line_iter.next()? {
753            let rva = match line_info.offset.to_rva(address_map) {
754                Some(rva) => u64::from(rva.0),
755                None => continue,
756            };
757
758            // skip 0-sized line infos
759            let size = line_info.length.map(u64::from);
760            if size == Some(0) {
761                continue;
762            }
763
764            let file_info = program.get_file_info(line_info.file_index)?;
765            let mut file = self.debug_info.file_info(file_info)?;
766
767            // Fill in source server information if available
768            if let Some(mappings) = self.debug_info.srcsrv.as_ref() {
769                let original_path = file.path_str();
770                let info = mappings.get_info(&original_path);
771                if let Some(SourceServerInfo { path, revision }) = info {
772                    file.set_srcsrv_path(path.as_bytes());
773                    file.set_srcsrv_revision(revision);
774                }
775            }
776
777            lines.push(LineInfo {
778                address: rva,
779                size,
780                file,
781                line: line_info.line_start.into(),
782            });
783        }
784        lines.sort_by_key(|line| line.address);
785
786        // Merge line infos that only differ in their `column` information, which we don't
787        // care about. We only want to output line infos that differ in their file/line.
788        lines.dedup_by(|current, prev| {
789            // the records need to be consecutive to be able to merge
790            let first_end = prev.size.and_then(|size| prev.address.checked_add(size));
791            let is_consecutive = first_end == Some(current.address);
792            // the line record points to the same file/line, so we want to merge/dedupe it
793            if is_consecutive && prev.file == current.file && prev.line == current.line {
794                prev.size = prev
795                    .size
796                    .map(|first_size| first_size.saturating_add(current.size.unwrap_or(0)));
797
798                return true;
799            }
800            false
801        });
802
803        Ok(lines)
804    }
805
806    /// Sanitize the collected lines.
807    ///
808    /// This essentially filters out all the lines that lay outside of the function range.
809    ///
810    /// For example we have observed in a real-world pdb that has:
811    /// - A function 0x33ea50 (size 0xc)
812    /// - With one line record: 0x33e850 (size 0x26)
813    ///
814    /// The line record is completely outside the range of the function.
815    fn sanitize_lines(func: &mut Function) {
816        let fn_start = func.address;
817        let fn_end = func.end_address();
818        func.lines.retain(|line| {
819            if line.address >= fn_end {
820                return false;
821            }
822            let line_end = match line.size {
823                Some(size) => line.address.saturating_add(size),
824                None => return true,
825            };
826            line_end > fn_start
827        });
828    }
829
830    fn handle_function(
831        &self,
832        offset: PdbInternalSectionOffset,
833        len: u32,
834        name: RawString<'s>,
835        type_index: TypeIndex,
836        program: &LineProgram<'s>,
837    ) -> Result<Option<Function<'s>>, PdbError> {
838        let address_map = &self.debug_info.address_map;
839
840        // Translate the function's address to the PE's address space. If this fails, we're
841        // likely dealing with an invalid function and can skip it.
842        let address = match offset.to_rva(address_map) {
843            Some(addr) => u64::from(addr.0),
844            None => return Ok(None),
845        };
846
847        // Names from the private symbol table are generally demangled. They contain the path of the
848        // scope and name of the function itself, including type parameters, and the parameter lists
849        // are contained in the type info. We do not emit a return type.
850        let formatter = &self.debug_info.type_formatter;
851        let name = name.to_string();
852        let name = Name::new(
853            formatter
854                .format_function(&name, self.module_index, type_index)
855                .map(Cow::Owned)
856                .unwrap_or(name),
857            NameMangling::Unmangled,
858            Language::Unknown,
859        );
860
861        let line_iter = program.lines_for_symbol(offset);
862        let lines = self.collect_lines(line_iter, program)?;
863
864        Ok(Some(Function {
865            address,
866            size: len.into(),
867            name,
868            compilation_dir: &[],
869            lines,
870            inlinees: Vec::new(),
871            inline: false,
872        }))
873    }
874
875    fn handle_procedure(
876        &self,
877        proc: &ProcedureSymbol<'s>,
878        program: &LineProgram<'s>,
879    ) -> Result<Option<Function<'s>>, PdbError> {
880        self.handle_function(proc.offset, proc.len, proc.name, proc.type_index, program)
881    }
882
883    fn handle_separated_code(
884        &self,
885        proc: &ProcedureSymbol<'s>,
886        sepcode: &SeparatedCodeSymbol,
887        program: &LineProgram<'s>,
888    ) -> Result<Option<Function<'s>>, PdbError> {
889        self.handle_function(
890            sepcode.offset,
891            sepcode.len,
892            proc.name,
893            proc.type_index,
894            program,
895        )
896    }
897
898    fn handle_inlinee(
899        &self,
900        inline_site: InlineSiteSymbol<'s>,
901        parent_offset: PdbInternalSectionOffset,
902        inlinee: &pdb::Inlinee<'s>,
903        program: &LineProgram<'s>,
904    ) -> Result<Option<Function<'s>>, PdbError> {
905        let line_iter = inlinee.lines(parent_offset, &inline_site);
906        let lines = self.collect_lines(line_iter, program)?;
907
908        // If there are no line records, skip this inline function completely. Apparently, it was
909        // eliminated by the compiler, and cannot be hit by the program anymore. For `symbolic`,
910        // such functions do not have any use.
911        let start = match lines.first().map(|line| line.address) {
912            Some(address) => address,
913            None => return Ok(None),
914        };
915
916        let end = match lines
917            .last()
918            .map(|line| line.address + line.size.unwrap_or(1))
919        {
920            Some(address) => address,
921            None => return Ok(None),
922        };
923
924        let formatter = &self.debug_info.type_formatter;
925        let name = Name::new(
926            formatter.format_id(self.module_index, inline_site.inlinee)?,
927            NameMangling::Unmangled,
928            Language::Unknown,
929        );
930
931        Ok(Some(Function {
932            address: start,
933            size: end - start,
934            name,
935            compilation_dir: &[],
936            lines,
937            inlinees: Vec::new(),
938            inline: true,
939        }))
940    }
941
942    fn functions(&self) -> Result<Vec<Function<'s>>, PdbError> {
943        let program = self.module.line_program()?;
944        let mut symbols = self.module.symbols()?;
945
946        // Depending on the compiler version, the inlinee table might not be sorted. Since constant
947        // search through inlinees is too slow (due to repeated parsing), but Inlinees are rather
948        // small structures, it is relatively cheap to collect them into an in-memory index.
949        let inlinees: BTreeMap<_, _> = self
950            .module
951            .inlinees()?
952            .map(|i| Ok((i.index(), i)))
953            .collect()?;
954
955        let mut depth = 0;
956        let mut inc_next = false;
957        let mut skipped_depth = None;
958
959        let mut functions = Vec::new();
960        let mut stack = FunctionStack::new();
961        let mut proc_offsets = SmallVec::<[_; 3]>::new();
962        let mut last_proc = None;
963
964        while let Some(symbol) = symbols.next()? {
965            if inc_next {
966                depth += 1;
967            }
968
969            inc_next = symbol.starts_scope();
970            if symbol.ends_scope() {
971                depth -= 1;
972
973                if proc_offsets.last().is_some_and(|&(d, _)| d >= depth) {
974                    proc_offsets.pop();
975                }
976            }
977
978            // If we're navigating within a skipped function (see below), we can ignore this
979            // entry completely. Otherwise, we've moved out of any skipped function and can
980            // reset the stored depth.
981            match skipped_depth {
982                Some(skipped) if depth > skipped => continue,
983                _ => skipped_depth = None,
984            }
985
986            // Flush all functions out that exceed the current iteration depth. Since we
987            // encountered a symbol at this level, there will be no more inlinees to the
988            // previous function at the same level or any of it's children.
989            if symbol.ends_scope() {
990                stack.flush(depth, &mut functions);
991            }
992
993            let function = match symbol.parse() {
994                Ok(SymbolData::Procedure(proc)) => {
995                    proc_offsets.push((depth, proc.offset));
996                    let function = self.handle_procedure(&proc, &program)?;
997                    last_proc = Some(proc);
998                    function
999                }
1000                Ok(SymbolData::SeparatedCode(sepcode)) => match last_proc.as_ref() {
1001                    Some(last_proc) if last_proc.offset == sepcode.parent_offset => {
1002                        self.handle_separated_code(last_proc, &sepcode, &program)?
1003                    }
1004                    _ => continue,
1005                },
1006                Ok(SymbolData::InlineSite(site)) => {
1007                    let parent_offset = proc_offsets
1008                        .last()
1009                        .map(|&(_, offset)| offset)
1010                        .ok_or(PdbErrorKind::UnexpectedInline)?;
1011
1012                    // We can assume that inlinees will be listed in the inlinee table. If missing,
1013                    // skip silently instead of erroring out. Missing a single inline function is
1014                    // more acceptable in such a case than halting iteration completely.
1015                    if let Some(inlinee) = inlinees.get(&site.inlinee) {
1016                        // We have seen that the MSVC Compiler `19.16` (VS 2017) can output
1017                        // `ChangeFile` annotations which are not properly aligned to the beginning
1018                        // of a file checksum, leading to `UnimplementedFileChecksumKind` errors.
1019                        // Investigation showed that this can happen for inlined `{ctor}` functions,
1020                        // but there are no clear leads to why that might have happened, and how to
1021                        // recover from these broken annotations.
1022                        // For that reason, we skip these inlinees completely so we do not fail
1023                        // processing the complete pdb file.
1024                        self.handle_inlinee(site, parent_offset, inlinee, &program)
1025                            .ok()
1026                            .flatten()
1027                    } else {
1028                        None
1029                    }
1030                }
1031                // We need to ignore errors here since the PDB crate does not yet implement all
1032                // symbol types. Instead of erroring too often, it's better to swallow these.
1033                _ => continue,
1034            };
1035
1036            match function {
1037                Some(mut function) => {
1038                    Self::sanitize_lines(&mut function);
1039                    // TODO: figure out what to do with functions that have no more lines
1040                    // after sanitization
1041                    stack.push(depth, function)
1042                }
1043                None => skipped_depth = Some(depth),
1044            }
1045        }
1046
1047        // We're done, flush the remaining stack.
1048        stack.flush(0, &mut functions);
1049
1050        Ok(functions)
1051    }
1052}
1053
1054struct PdbUnitIterator<'s> {
1055    debug_info: &'s PdbDebugInfo<'s>,
1056    index: usize,
1057}
1058
1059impl<'s> Iterator for PdbUnitIterator<'s> {
1060    type Item = Result<Unit<'s>, PdbError>;
1061
1062    fn next(&mut self) -> Option<Self::Item> {
1063        let debug_info = self.debug_info;
1064        while self.index < debug_info.modules().len() {
1065            let module_index = self.index;
1066            let result = debug_info.get_module(module_index);
1067            self.index += 1;
1068
1069            let module = match result {
1070                Ok(Some(module)) => module,
1071                Ok(None) => continue,
1072                Err(error) => return Some(Err(error)),
1073            };
1074
1075            return Some(Unit::load(debug_info, module_index, module));
1076        }
1077
1078        None
1079    }
1080}
1081
1082/// An iterator over source files in a Pdb object.
1083pub struct PdbFileIterator<'s> {
1084    debug_info: &'s PdbDebugInfo<'s>,
1085    units: PdbUnitIterator<'s>,
1086    files: pdb::FileIterator<'s>,
1087    finished: bool,
1088}
1089
1090impl<'s> Iterator for PdbFileIterator<'s> {
1091    type Item = Result<FileEntry<'s>, PdbError>;
1092
1093    fn next(&mut self) -> Option<Self::Item> {
1094        if self.finished {
1095            return None;
1096        }
1097
1098        loop {
1099            if let Some(file_result) = self.files.next().transpose() {
1100                let result = file_result
1101                    .map_err(|err| err.into())
1102                    .and_then(|i| self.debug_info.file_info(i))
1103                    .map(|mut file| {
1104                        // Fill in source server information if available
1105                        if let Some(mappings) = &self.debug_info.srcsrv {
1106                            let original_path = file.path_str();
1107                            let info = mappings.get_info(&original_path);
1108                            if let Some(SourceServerInfo { path, revision }) = info {
1109                                file.set_srcsrv_path(path.as_bytes());
1110                                file.set_srcsrv_revision(revision);
1111                            }
1112                        }
1113
1114                        FileEntry::new(Cow::default(), file)
1115                    });
1116
1117                return Some(result);
1118            }
1119
1120            let unit = match self.units.next() {
1121                Some(Ok(unit)) => unit,
1122                Some(Err(error)) => return Some(Err(error)),
1123                None => break,
1124            };
1125
1126            let line_program = match unit.module.line_program() {
1127                Ok(line_program) => line_program,
1128                Err(error) => return Some(Err(error.into())),
1129            };
1130
1131            self.files = line_program.files();
1132        }
1133
1134        self.finished = true;
1135        None
1136    }
1137}
1138
1139/// An iterator over functions in a PDB file.
1140pub struct PdbFunctionIterator<'s> {
1141    units: PdbUnitIterator<'s>,
1142    functions: std::vec::IntoIter<Function<'s>>,
1143    finished: bool,
1144}
1145
1146impl<'s> Iterator for PdbFunctionIterator<'s> {
1147    type Item = Result<Function<'s>, PdbError>;
1148
1149    fn next(&mut self) -> Option<Self::Item> {
1150        if self.finished {
1151            return None;
1152        }
1153
1154        loop {
1155            if let Some(func) = self.functions.next() {
1156                return Some(Ok(func));
1157            }
1158
1159            let unit = match self.units.next() {
1160                Some(Ok(unit)) => unit,
1161                Some(Err(error)) => return Some(Err(error)),
1162                None => break,
1163            };
1164
1165            self.functions = match unit.functions() {
1166                Ok(functions) => functions.into_iter(),
1167                Err(error) => return Some(Err(error)),
1168            };
1169        }
1170
1171        self.finished = true;
1172        None
1173    }
1174}
1175
1176impl std::iter::FusedIterator for PdbFunctionIterator<'_> {}