Skip to main content

symbolic_debuginfo/pdb/
mod.rs

1//! Support for Program Database, the debug companion format on Windows.
2
3use std::borrow::Cow;
4use std::collections::btree_map::BTreeMap;
5use std::error::Error;
6use std::fmt;
7use std::io::Cursor;
8use std::sync::Arc;
9
10use elsa::FrozenMap;
11use parking_lot::RwLock;
12use pdb_addr2line::pdb::{
13    AddressMap, FallibleIterator, ImageSectionHeader, InlineSiteSymbol, LineProgram, MachineType,
14    Module, ModuleInfo, PdbInternalSectionOffset, ProcedureSymbol, RawString, SeparatedCodeSymbol,
15    SymbolData, TypeIndex,
16};
17use pdb_addr2line::ModuleProvider;
18use smallvec::SmallVec;
19use thiserror::Error;
20
21use symbolic_common::{
22    Arch, AsSelf, CodeId, CpuFamily, DebugId, Language, Name, NameMangling, SelfCell, Uuid,
23};
24
25use crate::base::*;
26use crate::function_stack::FunctionStack;
27use crate::pdb::srcsrv::{SourceServerInfo, SourceServerMappings};
28use crate::sourcebundle::SourceFileDescriptor;
29use crate::ParseObjectOptions;
30
31mod srcsrv;
32
33type Pdb<'data> = pdb::PDB<'data, Cursor<&'data [u8]>>;
34
35const MAGIC_BIG: &[u8] = b"Microsoft C/C++ MSF 7.00\r\n\x1a\x44\x53\x00\x00\x00";
36
37// Used for CFI, remove once abstraction is complete
38#[doc(hidden)]
39pub use pdb_addr2line::pdb;
40
41/// The error type for [`PdbError`].
42#[non_exhaustive]
43#[derive(Clone, Copy, Debug, PartialEq, Eq)]
44pub enum PdbErrorKind {
45    /// The PDB file is corrupted. See the cause for more information.
46    BadObject,
47
48    /// An inline record was encountered without an inlining parent.
49    UnexpectedInline,
50
51    /// Formatting of a type name failed.
52    FormattingFailed,
53}
54
55impl fmt::Display for PdbErrorKind {
56    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57        match self {
58            Self::BadObject => write!(f, "invalid pdb file"),
59            Self::UnexpectedInline => write!(f, "unexpected inline function without parent"),
60            Self::FormattingFailed => write!(f, "failed to format type name"),
61        }
62    }
63}
64
65/// An error when dealing with [`PdbObject`](struct.PdbObject.html).
66#[derive(Debug, Error)]
67#[error("{kind}")]
68pub struct PdbError {
69    kind: PdbErrorKind,
70    #[source]
71    source: Option<Box<dyn Error + Send + Sync + 'static>>,
72}
73
74impl PdbError {
75    /// Creates a new PDB error from a known kind of error as well as an arbitrary error
76    /// payload.
77    fn new<E>(kind: PdbErrorKind, source: E) -> Self
78    where
79        E: Into<Box<dyn Error + Send + Sync>>,
80    {
81        let source = Some(source.into());
82        Self { kind, source }
83    }
84
85    /// Returns the corresponding [`PdbErrorKind`] for this error.
86    pub fn kind(&self) -> PdbErrorKind {
87        self.kind
88    }
89}
90
91impl From<PdbErrorKind> for PdbError {
92    fn from(kind: PdbErrorKind) -> Self {
93        Self { kind, source: None }
94    }
95}
96
97impl From<pdb::Error> for PdbError {
98    fn from(e: pdb::Error) -> Self {
99        Self::new(PdbErrorKind::BadObject, e)
100    }
101}
102
103impl From<fmt::Error> for PdbError {
104    fn from(e: fmt::Error) -> Self {
105        Self::new(PdbErrorKind::FormattingFailed, e)
106    }
107}
108
109impl From<pdb_addr2line::Error> for PdbError {
110    fn from(e: pdb_addr2line::Error) -> Self {
111        match e {
112            pdb_addr2line::Error::PdbError(e) => Self::new(PdbErrorKind::BadObject, e),
113            pdb_addr2line::Error::FormatError(e) => Self::new(PdbErrorKind::FormattingFailed, e),
114            e => Self::new(PdbErrorKind::FormattingFailed, e),
115        }
116    }
117}
118
119/// Program Database, the debug companion format on Windows.
120///
121/// This object is a sole debug companion to [`PeObject`](../pdb/struct.PdbObject.html).
122pub struct PdbObject<'data> {
123    pdb: Arc<RwLock<Pdb<'data>>>,
124    debug_info: Arc<pdb::DebugInformation<'data>>,
125    pdb_info: pdb::PDBInformation<'data>,
126    public_syms: pdb::SymbolTable<'data>,
127    executable_sections: ExecutableSections,
128    data: &'data [u8],
129}
130
131// NB: The pdb crate simulates mmap behavior on any Read + Seek type. This implementation requires
132// mutability of the `Source` and uses trait objects without a Send + Sync barrier. We know that we
133// only instanciate `&[u8]` as source. Whenever we mutate the reader (to read a new module stream),
134// we acquire a write lock on the PDB, which should be sufficient.
135unsafe impl Send for PdbObject<'_> {}
136unsafe impl Sync for PdbObject<'_> {}
137
138impl<'data> PdbObject<'data> {
139    /// Tests whether the buffer could contain an PDB object.
140    pub fn test(data: &[u8]) -> bool {
141        // NB: "Microsoft C/C++ program database 2.00" is not supported by the pdb crate, so there
142        // is no point in pretending we could read it.
143        data.starts_with(MAGIC_BIG)
144    }
145
146    /// Tries to parse a PDB object from the given slice.
147    #[allow(clippy::arc_with_non_send_sync)]
148    pub fn parse(data: &'data [u8]) -> Result<Self, PdbError> {
149        let mut pdb = Pdb::open(Cursor::new(data))?;
150        let dbi = pdb.debug_information()?;
151        let pdbi = pdb.pdb_information()?;
152        let pubi = pdb.global_symbols()?;
153        let sections = pdb.sections()?;
154
155        Ok(PdbObject {
156            pdb: Arc::new(RwLock::new(pdb)),
157            debug_info: Arc::new(dbi),
158            pdb_info: pdbi,
159            public_syms: pubi,
160            data,
161            executable_sections: ExecutableSections::from_sections(&sections),
162        })
163    }
164
165    /// The container file format, which is always `FileFormat::Pdb`.
166    pub fn file_format(&self) -> FileFormat {
167        FileFormat::Pdb
168    }
169
170    /// The code identifier of this object, always `None`.
171    ///
172    /// PDB files do not contain sufficient information to compute the code identifier, since they
173    /// are lacking the relevant parts of the PE header.
174    pub fn code_id(&self) -> Option<CodeId> {
175        None
176    }
177
178    /// The debug information identifier of this PDB.
179    ///
180    /// The PDB stores a specific header that contains GUID and age bits. Additionally, Microsoft
181    /// uses the file name of the PDB to avoid GUID collisions. In most contexts, however, it is
182    /// sufficient to rely on the uniqueness of the GUID to identify a PDB.
183    ///
184    /// The same information is also stored in a header in the corresponding PE file, which can be
185    /// used to locate a PDB from a PE.
186    pub fn debug_id(&self) -> DebugId {
187        // Prefer the age from the debug information stream, as it is more likely to correspond to
188        // the executable than the PDB info header. The latter is often bumped independently when
189        // the PDB is processed or optimized, which causes it to go out of sync with the original
190        // image.
191        let age = self.debug_info.age().unwrap_or(self.pdb_info.age);
192        match Uuid::from_slice(&self.pdb_info.guid.as_bytes()[..]) {
193            Ok(uuid) => DebugId::from_parts(uuid, age),
194            Err(_) => DebugId::default(),
195        }
196    }
197
198    /// The CPU architecture of this object, as specified in the debug information stream (DBI).
199    pub fn arch(&self) -> Arch {
200        self.debug_info
201            .machine_type()
202            .ok()
203            .map(arch_from_machine)
204            .unwrap_or_default()
205    }
206
207    /// Returns true if this object contains source server information.
208    pub fn has_source_server_data(&self) -> Result<bool, PdbError> {
209        let mut pdb = self.pdb.write();
210        match pdb.named_stream(b"srcsrv") {
211            Ok(_) => Ok(true),
212            Err(pdb::Error::StreamNameNotFound) => {
213                // No source server info is normal for many PDBs
214                Ok(false)
215            }
216            Err(e) => Err(e.into()),
217        }
218    }
219
220    /// The kind of this object, which is always `Debug`.
221    pub fn kind(&self) -> ObjectKind {
222        ObjectKind::Debug
223    }
224
225    /// The address at which the image prefers to be loaded into memory.
226    ///
227    /// The PDB only stores relative addresses, and more importantly, does not provide sufficient
228    /// information to compute the original PE's load address. The according PE, however does
229    /// feature a load address (called `image_base`). See [`PeObject::load_address`] for more
230    /// information.
231    ///
232    /// [`PeObject::load_address`]: ../pe/struct.PeObject.html#method.load_address
233    pub fn load_address(&self) -> u64 {
234        0
235    }
236
237    /// Determines whether this object exposes a public symbol table.
238    pub fn has_symbols(&self) -> bool {
239        // We can safely assume that PDBs will always contain symbols.
240        true
241    }
242
243    /// Returns an iterator over symbols in the public symbol table.
244    pub fn symbols(&self) -> PdbSymbolIterator<'data, '_> {
245        PdbSymbolIterator {
246            symbols: self.public_syms.iter(),
247            address_map: self.pdb.write().address_map().ok(),
248            executable_sections: &self.executable_sections,
249        }
250    }
251
252    /// Returns an ordered map of symbols in the symbol table.
253    pub fn symbol_map(&self) -> SymbolMap<'data> {
254        self.symbols().collect()
255    }
256
257    /// Determines whether this object contains debug information.
258    pub fn has_debug_info(&self) -> bool {
259        // There is no cheap way to find out if a PDB contains debugging information that we care
260        // about. Effectively, we're interested in local symbols declared in the module info
261        // streams. To reliably determine whether any stream is present, we'd have to probe each one
262        // of them, which can result in quite a lot of disk I/O.
263        true
264    }
265
266    /// Determines whether this object contains embedded source.
267    pub fn has_sources(&self) -> bool {
268        false
269    }
270
271    /// Determines whether this object is malformed and was only partially parsed
272    pub fn is_malformed(&self) -> bool {
273        false
274    }
275
276    /// Constructs a debugging session.
277    pub fn debug_session(&self) -> Result<PdbDebugSession<'data>, PdbError> {
278        PdbDebugSession::build(self)
279    }
280
281    /// Determines whether this object contains stack unwinding information.
282    pub fn has_unwind_info(&self) -> bool {
283        // The PDB crate currently loads quite a lot of information from the PDB when accessing the
284        // frame table. However, we expect unwind info in every PDB for 32-bit builds, so we can
285        // just assume it's there if the architecture matches.
286        // TODO: Implement a better way by exposing the extra streams in the PDB crate.
287        self.arch().cpu_family() == CpuFamily::Intel32
288    }
289
290    /// Returns the raw data of the ELF file.
291    pub fn data(&self) -> &'data [u8] {
292        self.data
293    }
294
295    #[doc(hidden)]
296    pub fn inner(&self) -> &RwLock<Pdb<'data>> {
297        &self.pdb
298    }
299}
300
301impl fmt::Debug for PdbObject<'_> {
302    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
303        f.debug_struct("PdbObject")
304            .field("debug_id", &self.debug_id())
305            .field("arch", &self.arch())
306            .field("load_address", &format_args!("{:#x}", self.load_address()))
307            .field("has_symbols", &self.has_symbols())
308            .field("has_debug_info", &self.has_debug_info())
309            .field("has_unwind_info", &self.has_unwind_info())
310            .field("is_malformed", &self.is_malformed())
311            .finish()
312    }
313}
314
315impl<'slf, 'data: 'slf> AsSelf<'slf> for PdbObject<'data> {
316    type Ref = PdbObject<'slf>;
317
318    fn as_self(&'slf self) -> &'slf Self::Ref {
319        unsafe { std::mem::transmute(self) }
320    }
321}
322
323impl<'data> Parse<'data> for PdbObject<'data> {
324    type Error = PdbError;
325
326    fn test(data: &[u8]) -> bool {
327        Self::test(data)
328    }
329
330    fn parse_with_opts(data: &'data [u8], _opts: ParseObjectOptions) -> Result<Self, Self::Error> {
331        Self::parse(data)
332    }
333}
334
335impl<'data: 'object, 'object> ObjectLike<'data, 'object> for PdbObject<'data> {
336    type Error = PdbError;
337    type Session = PdbDebugSession<'data>;
338    type SymbolIterator = PdbSymbolIterator<'data, 'object>;
339
340    fn file_format(&self) -> FileFormat {
341        self.file_format()
342    }
343
344    fn code_id(&self) -> Option<CodeId> {
345        self.code_id()
346    }
347
348    fn debug_id(&self) -> DebugId {
349        self.debug_id()
350    }
351
352    fn arch(&self) -> Arch {
353        self.arch()
354    }
355
356    fn kind(&self) -> ObjectKind {
357        self.kind()
358    }
359
360    fn load_address(&self) -> u64 {
361        self.load_address()
362    }
363
364    fn has_symbols(&self) -> bool {
365        self.has_symbols()
366    }
367
368    fn symbols(&'object self) -> Self::SymbolIterator {
369        self.symbols()
370    }
371
372    fn symbol_map(&self) -> SymbolMap<'data> {
373        self.symbol_map()
374    }
375
376    fn has_debug_info(&self) -> bool {
377        self.has_debug_info()
378    }
379
380    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
381        self.debug_session()
382    }
383
384    fn has_unwind_info(&self) -> bool {
385        self.has_unwind_info()
386    }
387
388    fn has_sources(&self) -> bool {
389        self.has_sources()
390    }
391
392    fn is_malformed(&self) -> bool {
393        self.is_malformed()
394    }
395}
396
397pub(crate) fn arch_from_machine(machine: MachineType) -> Arch {
398    match machine {
399        MachineType::X86 => Arch::X86,
400        MachineType::Amd64 => Arch::Amd64,
401        MachineType::Arm => Arch::Arm,
402        MachineType::Arm64 => Arch::Arm64,
403        MachineType::PowerPC => Arch::Ppc,
404        _ => Arch::Unknown,
405    }
406}
407
408/// Contains information about which sections are executable.
409struct ExecutableSections {
410    /// For every section header in the PDB, a boolean which indicates whether the "executable"
411    /// or "execute" flag is set in the section header's characteristics.
412    is_executable_per_section: Vec<bool>,
413}
414
415impl ExecutableSections {
416    pub fn from_sections(sections: &Option<Vec<ImageSectionHeader>>) -> Self {
417        Self {
418            is_executable_per_section: match sections {
419                Some(sections) => sections
420                    .iter()
421                    .map(|section| section.characteristics)
422                    .map(|char| char.executable() || char.execute())
423                    .collect(),
424                None => Default::default(),
425            },
426        }
427    }
428
429    /// Returns whether the given offset is contained in an executable section.
430    pub fn contains(&self, offset: &PdbInternalSectionOffset) -> bool {
431        // offset.section is a one-based index.
432        if offset.section == 0 {
433            // No section.
434            return false;
435        }
436
437        let section_index = (offset.section - 1) as usize;
438        self.is_executable_per_section
439            .get(section_index)
440            .cloned()
441            .unwrap_or(false)
442    }
443}
444
445/// An iterator over symbols in the PDB file.
446///
447/// Returned by [`PdbObject::symbols`](struct.PdbObject.html#method.symbols).
448pub struct PdbSymbolIterator<'data, 'object> {
449    symbols: pdb::SymbolIter<'object>,
450    address_map: Option<AddressMap<'data>>,
451    executable_sections: &'object ExecutableSections,
452}
453
454impl<'data> Iterator for PdbSymbolIterator<'data, '_> {
455    type Item = Symbol<'data>;
456
457    fn next(&mut self) -> Option<Self::Item> {
458        let address_map = self.address_map.as_ref()?;
459
460        while let Ok(Some(symbol)) = self.symbols.next() {
461            if let Ok(SymbolData::Public(public)) = symbol.parse() {
462                if !self.executable_sections.contains(&public.offset) {
463                    continue;
464                }
465
466                let address = match public.offset.to_rva(address_map) {
467                    Some(address) => address,
468                    None => continue,
469                };
470
471                // pdb::SymbolIter offers data bound to its own lifetime since it holds the
472                // buffer containing public symbols. The contract requires that we return
473                // `Symbol<'data>`, so we cannot return zero-copy symbols here.
474                let cow = public.name.to_string();
475                let name = Cow::from(String::from(cow));
476
477                return Some(Symbol {
478                    name: Some(name),
479                    address: u64::from(address.0),
480                    size: 0, // Computed in `SymbolMap`
481                });
482            }
483        }
484
485        None
486    }
487}
488
489struct PdbStreams<'d> {
490    debug_info: Arc<pdb::DebugInformation<'d>>,
491    type_info: pdb::TypeInformation<'d>,
492    id_info: pdb::IdInformation<'d>,
493    string_table: Option<pdb::StringTable<'d>>,
494    srcsrv: Option<Vec<u8>>,
495
496    pdb: Arc<RwLock<Pdb<'d>>>,
497
498    /// ModuleInfo objects are stored on this object (outside PdbDebugInfo) so that the
499    /// PdbDebugInfo can store a TypeFormatter, which has a lifetime dependency on its
500    /// ModuleProvider, which is this PdbStreams. This is so that TypeFormatter can cache
501    /// CrossModuleImports inside itself, and those have a lifetime dependency on the
502    /// ModuleInfo.
503    module_infos: FrozenMap<usize, Box<ModuleInfo<'d>>>,
504}
505
506impl<'d> PdbStreams<'d> {
507    fn from_pdb(pdb: &PdbObject<'d>) -> Result<Self, PdbError> {
508        let mut p = pdb.pdb.write();
509
510        // PDB::string_table errors if the named stream for the string table is not present.
511        // However, this occurs in certain PDBs and does not automatically indicate an error.
512        let string_table = match p.string_table() {
513            Ok(string_table) => Some(string_table),
514            Err(pdb::Error::StreamNameNotFound) => None,
515            Err(e) => return Err(e.into()),
516        };
517
518        // Try to open the "srcsrv" named stream
519        let srcsrv = match p.named_stream(b"srcsrv") {
520            Ok(stream) => Some(stream.as_slice().to_vec()),
521            Err(pdb::Error::StreamNameNotFound) => {
522                // No source server info is normal for many PDBs
523                None
524            }
525            Err(e) => return Err(e.into()),
526        };
527
528        Ok(Self {
529            string_table,
530            debug_info: pdb.debug_info.clone(),
531            type_info: p.type_information()?,
532            id_info: p.id_information()?,
533            srcsrv,
534            pdb: pdb.pdb.clone(),
535            module_infos: FrozenMap::new(),
536        })
537    }
538}
539
540impl<'d> pdb_addr2line::ModuleProvider<'d> for PdbStreams<'d> {
541    fn get_module_info(
542        &self,
543        module_index: usize,
544        module: &Module,
545    ) -> Result<Option<&ModuleInfo<'d>>, pdb::Error> {
546        if let Some(module_info) = self.module_infos.get(&module_index) {
547            return Ok(Some(module_info));
548        }
549
550        let mut pdb = self.pdb.write();
551        Ok(pdb.module_info(module)?.map(|module_info| {
552            self.module_infos
553                .insert(module_index, Box::new(module_info))
554        }))
555    }
556}
557
558struct PdbDebugInfo<'d> {
559    /// The streams, to load module streams on demand.
560    streams: &'d PdbStreams<'d>,
561    /// OMAP structure to map reordered sections to RVAs.
562    address_map: pdb::AddressMap<'d>,
563    /// String table for name lookups.
564    string_table: Option<&'d pdb::StringTable<'d>>,
565    /// Type formatter for function name strings.
566    type_formatter: pdb_addr2line::TypeFormatter<'d, 'd>,
567    srcsrv: Option<SourceServerMappings<'d>>,
568}
569
570impl<'d> PdbDebugInfo<'d> {
571    fn build(pdb: &PdbObject<'d>, streams: &'d PdbStreams<'d>) -> Result<Self, PdbError> {
572        let modules = streams.debug_info.modules()?.collect::<Vec<_>>()?;
573
574        // Avoid deadlocks by only covering the two access to the address map. For
575        // instance, `pdb.symbol_map()` requires a mutable borrow of the PDB as well.
576        let mut p = pdb.pdb.write();
577        let address_map = p.address_map()?;
578
579        drop(p);
580
581        let srcsrv = streams
582            .srcsrv
583            .as_deref()
584            // We don't want to exit on error here so we can still use the PDB
585            // file even if we fail to parse the source server part.
586            // TODO: It would be nice to surface this error to users, if and
587            // when we add logging to this crate.
588            .and_then(|stream| SourceServerMappings::parse(stream).ok());
589
590        Ok(PdbDebugInfo {
591            address_map,
592            streams,
593            string_table: streams.string_table.as_ref(),
594            srcsrv,
595            type_formatter: pdb_addr2line::TypeFormatter::new_from_parts(
596                streams,
597                modules,
598                &streams.debug_info,
599                &streams.type_info,
600                &streams.id_info,
601                streams.string_table.as_ref(),
602                Default::default(),
603            )?,
604        })
605    }
606
607    /// Returns an iterator over all compilation units (modules).
608    fn units(&'d self) -> PdbUnitIterator<'d> {
609        PdbUnitIterator {
610            debug_info: self,
611            index: 0,
612        }
613    }
614
615    fn modules(&self) -> &[Module<'d>] {
616        self.type_formatter.modules()
617    }
618
619    fn get_module(&'d self, index: usize) -> Result<Option<&'d ModuleInfo<'d>>, PdbError> {
620        // Silently ignore module references out-of-bound
621        let module = match self.modules().get(index) {
622            Some(module) => module,
623            None => return Ok(None),
624        };
625
626        Ok(self.streams.get_module_info(index, module)?)
627    }
628
629    fn file_info(&self, file_info: pdb::FileInfo<'d>) -> Result<FileInfo<'_>, PdbError> {
630        let file_path = match self.string_table {
631            Some(string_table) => file_info.name.to_raw_string(string_table)?,
632            None => "".into(),
633        };
634
635        Ok(FileInfo::from_path(file_path.as_bytes()))
636    }
637}
638
639impl<'slf, 'd: 'slf> AsSelf<'slf> for PdbDebugInfo<'d> {
640    type Ref = PdbDebugInfo<'slf>;
641
642    fn as_self(&'slf self) -> &'slf Self::Ref {
643        unsafe { std::mem::transmute(self) }
644    }
645}
646
647/// Debug session for PDB objects.
648pub struct PdbDebugSession<'d> {
649    cell: SelfCell<Box<PdbStreams<'d>>, PdbDebugInfo<'d>>,
650}
651
652impl<'d> PdbDebugSession<'d> {
653    fn build(pdb: &PdbObject<'d>) -> Result<Self, PdbError> {
654        let streams = PdbStreams::from_pdb(pdb)?;
655
656        let cell = SelfCell::try_new(Box::new(streams), |streams| {
657            PdbDebugInfo::build(pdb, unsafe { &*streams })
658        })?;
659
660        Ok(PdbDebugSession { cell })
661    }
662
663    /// Returns an iterator over all source files in this debug file.
664    pub fn files(&self) -> PdbFileIterator<'_> {
665        PdbFileIterator {
666            debug_info: self.cell.get(),
667            units: self.cell.get().units(),
668            files: pdb::FileIterator::default(),
669            finished: false,
670        }
671    }
672
673    /// Returns an iterator over all functions in this debug file.
674    pub fn functions(&self) -> PdbFunctionIterator<'_> {
675        PdbFunctionIterator {
676            units: self.cell.get().units(),
677            functions: Vec::new().into_iter(),
678            finished: false,
679        }
680    }
681
682    /// See [DebugSession::source_by_path] for more information.
683    pub fn source_by_path(
684        &self,
685        _path: &str,
686    ) -> Result<Option<SourceFileDescriptor<'_>>, PdbError> {
687        Ok(None)
688    }
689
690    /// Returns the SRCSRV VCS integration name if available.
691    ///
692    /// This extracts the version control system identifier from the SRCSRV stream,
693    /// if present. Common values include "perforce", "tfs", "git", etc.
694    /// Returns `None` if no SRCSRV stream exists or if it cannot be parsed.
695    pub fn srcsrv_vcs_name(&self) -> Option<String> {
696        self.cell
697            .get()
698            .srcsrv
699            .as_ref()
700            .map(|srcsrv| srcsrv.vcs_name().to_owned())
701    }
702}
703
704impl<'session> DebugSession<'session> for PdbDebugSession<'_> {
705    type Error = PdbError;
706    type FunctionIterator = PdbFunctionIterator<'session>;
707    type FileIterator = PdbFileIterator<'session>;
708
709    fn functions(&'session self) -> Self::FunctionIterator {
710        self.functions()
711    }
712
713    fn files(&'session self) -> Self::FileIterator {
714        self.files()
715    }
716
717    fn source_by_path(&self, path: &str) -> Result<Option<SourceFileDescriptor<'_>>, Self::Error> {
718        self.source_by_path(path)
719    }
720}
721
722struct Unit<'s> {
723    debug_info: &'s PdbDebugInfo<'s>,
724    module_index: usize,
725    module: &'s pdb::ModuleInfo<'s>,
726}
727
728impl<'s> Unit<'s> {
729    fn load(
730        debug_info: &'s PdbDebugInfo<'s>,
731        module_index: usize,
732        module: &'s pdb::ModuleInfo<'s>,
733    ) -> Result<Self, PdbError> {
734        Ok(Self {
735            debug_info,
736            module_index,
737            module,
738        })
739    }
740
741    fn collect_lines<I>(
742        &self,
743        mut line_iter: I,
744        program: &LineProgram<'s>,
745    ) -> Result<Vec<LineInfo<'s>>, PdbError>
746    where
747        I: FallibleIterator<Item = pdb::LineInfo>,
748        PdbError: From<I::Error>,
749    {
750        let address_map = &self.debug_info.address_map;
751
752        let mut lines = Vec::new();
753        while let Some(line_info) = line_iter.next()? {
754            let rva = match line_info.offset.to_rva(address_map) {
755                Some(rva) => u64::from(rva.0),
756                None => continue,
757            };
758
759            // skip 0-sized line infos
760            let size = line_info.length.map(u64::from);
761            if size == Some(0) {
762                continue;
763            }
764
765            let file_info = program.get_file_info(line_info.file_index)?;
766            let mut file = self.debug_info.file_info(file_info)?;
767
768            // Fill in source server information if available
769            if let Some(mappings) = self.debug_info.srcsrv.as_ref() {
770                let original_path = file.path_str();
771                let info = mappings.get_info(&original_path);
772                if let Some(SourceServerInfo { path, revision }) = info {
773                    file.set_srcsrv_path(path.as_bytes());
774                    file.set_srcsrv_revision(revision);
775                }
776            }
777
778            lines.push(LineInfo {
779                address: rva,
780                size,
781                file,
782                line: line_info.line_start.into(),
783            });
784        }
785        lines.sort_by_key(|line| line.address);
786
787        // Merge line infos that only differ in their `column` information, which we don't
788        // care about. We only want to output line infos that differ in their file/line.
789        lines.dedup_by(|current, prev| {
790            // the records need to be consecutive to be able to merge
791            let first_end = prev.size.and_then(|size| prev.address.checked_add(size));
792            let is_consecutive = first_end == Some(current.address);
793            // the line record points to the same file/line, so we want to merge/dedupe it
794            if is_consecutive && prev.file == current.file && prev.line == current.line {
795                prev.size = prev
796                    .size
797                    .map(|first_size| first_size.saturating_add(current.size.unwrap_or(0)));
798
799                return true;
800            }
801            false
802        });
803
804        Ok(lines)
805    }
806
807    /// Sanitize the collected lines.
808    ///
809    /// This essentially filters out all the lines that lay outside of the function range.
810    ///
811    /// For example we have observed in a real-world pdb that has:
812    /// - A function 0x33ea50 (size 0xc)
813    /// - With one line record: 0x33e850 (size 0x26)
814    ///
815    /// The line record is completely outside the range of the function.
816    fn sanitize_lines(func: &mut Function) {
817        let fn_start = func.address;
818        let fn_end = func.end_address();
819        func.lines.retain(|line| {
820            if line.address >= fn_end {
821                return false;
822            }
823            let line_end = match line.size {
824                Some(size) => line.address.saturating_add(size),
825                None => return true,
826            };
827            line_end > fn_start
828        });
829    }
830
831    fn handle_function(
832        &self,
833        offset: PdbInternalSectionOffset,
834        len: u32,
835        name: RawString<'s>,
836        type_index: TypeIndex,
837        program: &LineProgram<'s>,
838    ) -> Result<Option<Function<'s>>, PdbError> {
839        let address_map = &self.debug_info.address_map;
840
841        // Translate the function's address to the PE's address space. If this fails, we're
842        // likely dealing with an invalid function and can skip it.
843        let address = match offset.to_rva(address_map) {
844            Some(addr) => u64::from(addr.0),
845            None => return Ok(None),
846        };
847
848        // Names from the private symbol table are generally demangled. They contain the path of the
849        // scope and name of the function itself, including type parameters, and the parameter lists
850        // are contained in the type info. We do not emit a return type.
851        let formatter = &self.debug_info.type_formatter;
852        let name = name.to_string();
853        let name = Name::new(
854            formatter
855                .format_function(&name, self.module_index, type_index)
856                .map(Cow::Owned)
857                .unwrap_or(name),
858            NameMangling::Unmangled,
859            Language::Unknown,
860        );
861
862        let line_iter = program.lines_for_symbol(offset);
863        let lines = self.collect_lines(line_iter, program)?;
864
865        Ok(Some(Function {
866            address,
867            size: len.into(),
868            name,
869            compilation_dir: &[],
870            lines,
871            inlinees: Vec::new(),
872            inline: false,
873        }))
874    }
875
876    fn handle_procedure(
877        &self,
878        proc: &ProcedureSymbol<'s>,
879        program: &LineProgram<'s>,
880    ) -> Result<Option<Function<'s>>, PdbError> {
881        self.handle_function(proc.offset, proc.len, proc.name, proc.type_index, program)
882    }
883
884    fn handle_separated_code(
885        &self,
886        proc: &ProcedureSymbol<'s>,
887        sepcode: &SeparatedCodeSymbol,
888        program: &LineProgram<'s>,
889    ) -> Result<Option<Function<'s>>, PdbError> {
890        self.handle_function(
891            sepcode.offset,
892            sepcode.len,
893            proc.name,
894            proc.type_index,
895            program,
896        )
897    }
898
899    fn handle_inlinee(
900        &self,
901        inline_site: InlineSiteSymbol<'s>,
902        parent_offset: PdbInternalSectionOffset,
903        inlinee: &pdb::Inlinee<'s>,
904        program: &LineProgram<'s>,
905    ) -> Result<Option<Function<'s>>, PdbError> {
906        let line_iter = inlinee.lines(parent_offset, &inline_site);
907        let lines = self.collect_lines(line_iter, program)?;
908
909        // If there are no line records, skip this inline function completely. Apparently, it was
910        // eliminated by the compiler, and cannot be hit by the program anymore. For `symbolic`,
911        // such functions do not have any use.
912        let start = match lines.first().map(|line| line.address) {
913            Some(address) => address,
914            None => return Ok(None),
915        };
916
917        let end = match lines
918            .last()
919            .map(|line| line.address + line.size.unwrap_or(1))
920        {
921            Some(address) => address,
922            None => return Ok(None),
923        };
924
925        let formatter = &self.debug_info.type_formatter;
926        let name = Name::new(
927            formatter.format_id(self.module_index, inline_site.inlinee)?,
928            NameMangling::Unmangled,
929            Language::Unknown,
930        );
931
932        Ok(Some(Function {
933            address: start,
934            size: end - start,
935            name,
936            compilation_dir: &[],
937            lines,
938            inlinees: Vec::new(),
939            inline: true,
940        }))
941    }
942
943    fn functions(&self) -> Result<Vec<Function<'s>>, PdbError> {
944        let program = self.module.line_program()?;
945        let mut symbols = self.module.symbols()?;
946
947        // Depending on the compiler version, the inlinee table might not be sorted. Since constant
948        // search through inlinees is too slow (due to repeated parsing), but Inlinees are rather
949        // small structures, it is relatively cheap to collect them into an in-memory index.
950        let inlinees: BTreeMap<_, _> = self
951            .module
952            .inlinees()?
953            .map(|i| Ok((i.index(), i)))
954            .collect()?;
955
956        let mut depth = 0;
957        let mut inc_next = false;
958        let mut skipped_depth = None;
959
960        let mut functions = Vec::new();
961        let mut stack = FunctionStack::new();
962        let mut proc_offsets = SmallVec::<[_; 3]>::new();
963        let mut last_proc = None;
964
965        while let Some(symbol) = symbols.next()? {
966            if inc_next {
967                depth += 1;
968            }
969
970            inc_next = symbol.starts_scope();
971            if symbol.ends_scope() {
972                depth -= 1;
973
974                if proc_offsets.last().is_some_and(|&(d, _)| d >= depth) {
975                    proc_offsets.pop();
976                }
977            }
978
979            // If we're navigating within a skipped function (see below), we can ignore this
980            // entry completely. Otherwise, we've moved out of any skipped function and can
981            // reset the stored depth.
982            match skipped_depth {
983                Some(skipped) if depth > skipped => continue,
984                _ => skipped_depth = None,
985            }
986
987            // Flush all functions out that exceed the current iteration depth. Since we
988            // encountered a symbol at this level, there will be no more inlinees to the
989            // previous function at the same level or any of it's children.
990            if symbol.ends_scope() {
991                stack.flush(depth, &mut functions);
992            }
993
994            let function = match symbol.parse() {
995                Ok(SymbolData::Procedure(proc)) => {
996                    proc_offsets.push((depth, proc.offset));
997                    let function = self.handle_procedure(&proc, &program)?;
998                    last_proc = Some(proc);
999                    function
1000                }
1001                Ok(SymbolData::SeparatedCode(sepcode)) => match last_proc.as_ref() {
1002                    Some(last_proc) if last_proc.offset == sepcode.parent_offset => {
1003                        self.handle_separated_code(last_proc, &sepcode, &program)?
1004                    }
1005                    _ => continue,
1006                },
1007                Ok(SymbolData::InlineSite(site)) => {
1008                    let parent_offset = proc_offsets
1009                        .last()
1010                        .map(|&(_, offset)| offset)
1011                        .ok_or(PdbErrorKind::UnexpectedInline)?;
1012
1013                    // We can assume that inlinees will be listed in the inlinee table. If missing,
1014                    // skip silently instead of erroring out. Missing a single inline function is
1015                    // more acceptable in such a case than halting iteration completely.
1016                    if let Some(inlinee) = inlinees.get(&site.inlinee) {
1017                        // We have seen that the MSVC Compiler `19.16` (VS 2017) can output
1018                        // `ChangeFile` annotations which are not properly aligned to the beginning
1019                        // of a file checksum, leading to `UnimplementedFileChecksumKind` errors.
1020                        // Investigation showed that this can happen for inlined `{ctor}` functions,
1021                        // but there are no clear leads to why that might have happened, and how to
1022                        // recover from these broken annotations.
1023                        // For that reason, we skip these inlinees completely so we do not fail
1024                        // processing the complete pdb file.
1025                        self.handle_inlinee(site, parent_offset, inlinee, &program)
1026                            .ok()
1027                            .flatten()
1028                    } else {
1029                        None
1030                    }
1031                }
1032                // We need to ignore errors here since the PDB crate does not yet implement all
1033                // symbol types. Instead of erroring too often, it's better to swallow these.
1034                _ => continue,
1035            };
1036
1037            match function {
1038                Some(mut function) => {
1039                    Self::sanitize_lines(&mut function);
1040                    // TODO: figure out what to do with functions that have no more lines
1041                    // after sanitization
1042                    stack.push(depth, function)
1043                }
1044                None => skipped_depth = Some(depth),
1045            }
1046        }
1047
1048        // We're done, flush the remaining stack.
1049        stack.flush(0, &mut functions);
1050
1051        Ok(functions)
1052    }
1053}
1054
1055struct PdbUnitIterator<'s> {
1056    debug_info: &'s PdbDebugInfo<'s>,
1057    index: usize,
1058}
1059
1060impl<'s> Iterator for PdbUnitIterator<'s> {
1061    type Item = Result<Unit<'s>, PdbError>;
1062
1063    fn next(&mut self) -> Option<Self::Item> {
1064        let debug_info = self.debug_info;
1065        while self.index < debug_info.modules().len() {
1066            let module_index = self.index;
1067            let result = debug_info.get_module(module_index);
1068            self.index += 1;
1069
1070            let module = match result {
1071                Ok(Some(module)) => module,
1072                Ok(None) => continue,
1073                Err(error) => return Some(Err(error)),
1074            };
1075
1076            return Some(Unit::load(debug_info, module_index, module));
1077        }
1078
1079        None
1080    }
1081}
1082
1083/// An iterator over source files in a Pdb object.
1084pub struct PdbFileIterator<'s> {
1085    debug_info: &'s PdbDebugInfo<'s>,
1086    units: PdbUnitIterator<'s>,
1087    files: pdb::FileIterator<'s>,
1088    finished: bool,
1089}
1090
1091impl<'s> Iterator for PdbFileIterator<'s> {
1092    type Item = Result<FileEntry<'s>, PdbError>;
1093
1094    fn next(&mut self) -> Option<Self::Item> {
1095        if self.finished {
1096            return None;
1097        }
1098
1099        loop {
1100            if let Some(file_result) = self.files.next().transpose() {
1101                let result = file_result
1102                    .map_err(|err| err.into())
1103                    .and_then(|i| self.debug_info.file_info(i))
1104                    .map(|mut file| {
1105                        // Fill in source server information if available
1106                        if let Some(mappings) = &self.debug_info.srcsrv {
1107                            let original_path = file.path_str();
1108                            let info = mappings.get_info(&original_path);
1109                            if let Some(SourceServerInfo { path, revision }) = info {
1110                                file.set_srcsrv_path(path.as_bytes());
1111                                file.set_srcsrv_revision(revision);
1112                            }
1113                        }
1114
1115                        FileEntry::new(Cow::default(), file)
1116                    });
1117
1118                return Some(result);
1119            }
1120
1121            let unit = match self.units.next() {
1122                Some(Ok(unit)) => unit,
1123                Some(Err(error)) => return Some(Err(error)),
1124                None => break,
1125            };
1126
1127            let line_program = match unit.module.line_program() {
1128                Ok(line_program) => line_program,
1129                Err(error) => return Some(Err(error.into())),
1130            };
1131
1132            self.files = line_program.files();
1133        }
1134
1135        self.finished = true;
1136        None
1137    }
1138}
1139
1140/// An iterator over functions in a PDB file.
1141pub struct PdbFunctionIterator<'s> {
1142    units: PdbUnitIterator<'s>,
1143    functions: std::vec::IntoIter<Function<'s>>,
1144    finished: bool,
1145}
1146
1147impl<'s> Iterator for PdbFunctionIterator<'s> {
1148    type Item = Result<Function<'s>, PdbError>;
1149
1150    fn next(&mut self) -> Option<Self::Item> {
1151        if self.finished {
1152            return None;
1153        }
1154
1155        loop {
1156            if let Some(func) = self.functions.next() {
1157                return Some(Ok(func));
1158            }
1159
1160            let unit = match self.units.next() {
1161                Some(Ok(unit)) => unit,
1162                Some(Err(error)) => return Some(Err(error)),
1163                None => break,
1164            };
1165
1166            self.functions = match unit.functions() {
1167                Ok(functions) => functions.into_iter(),
1168                Err(error) => return Some(Err(error)),
1169            };
1170        }
1171
1172        self.finished = true;
1173        None
1174    }
1175}
1176
1177impl std::iter::FusedIterator for PdbFunctionIterator<'_> {}