Skip to main content

symbolic_debuginfo/
dwarf.rs

1//! Support for DWARF debugging information, common to ELF and MachO.
2//! In rare cases, PE's may contain it as well.
3//!
4//! The central element of this module is the [`Dwarf`] trait, which is implemented by [`ElfObject`],
5//! [`MachObject`] and [`PeObject`]. The dwarf debug session object can be obtained via getters on those types.
6//!
7//! [`Dwarf`]: trait.Dwarf.html
8//! [`ElfObject`]: ../elf/struct.ElfObject.html
9//! [`MachObject`]: ../macho/struct.MachObject.html
10//! [`PeObject`]: ../pe/struct.PeObject.html
11
12use std::borrow::Cow;
13use std::collections::BTreeSet;
14use std::error::Error;
15use std::fmt;
16use std::marker::PhantomData;
17use std::ops::Deref;
18use std::sync::Arc;
19
20use fallible_iterator::FallibleIterator;
21use gimli::read::{AttributeValue, Error as GimliError, Range};
22use gimli::{constants, AbbreviationsCacheStrategy, DwarfFileType, UnitSectionOffset};
23use once_cell::sync::OnceCell;
24use thiserror::Error;
25
26use symbolic_common::{AsSelf, Language, Name, NameMangling, SelfCell};
27
28use crate::base::*;
29use crate::function_builder::FunctionBuilder;
30#[cfg(feature = "macho")]
31use crate::macho::BcSymbolMap;
32use crate::sourcebundle::SourceFileDescriptor;
33
34/// This is a fake BcSymbolMap used when macho support is turned off since they are unfortunately
35/// part of the dwarf interface
36#[cfg(not(feature = "macho"))]
37#[derive(Debug)]
38pub struct BcSymbolMap<'d> {
39    _marker: std::marker::PhantomData<&'d str>,
40}
41
42#[cfg(not(feature = "macho"))]
43impl<'d> BcSymbolMap<'d> {
44    pub(crate) fn resolve_opt(&self, _name: impl AsRef<[u8]>) -> Option<&str> {
45        None
46    }
47}
48
49#[doc(hidden)]
50pub use gimli;
51pub use gimli::RunTimeEndian as Endian;
52
53type Slice<'a> = gimli::read::EndianSlice<'a, Endian>;
54type RangeLists<'a> = gimli::read::RangeLists<Slice<'a>>;
55type Unit<'a> = gimli::read::Unit<Slice<'a>>;
56type DwarfInner<'a> = gimli::read::Dwarf<Slice<'a>>;
57
58type Die<'d> = gimli::read::DebuggingInformationEntry<Slice<'d>, usize>;
59type Attribute<'a> = gimli::read::Attribute<Slice<'a>>;
60type UnitOffset = gimli::read::UnitOffset<usize>;
61type DebugInfoOffset = gimli::DebugInfoOffset<usize>;
62type EntriesRaw<'d, 'u> = gimli::EntriesRaw<'u, Slice<'d>>;
63
64type UnitHeader<'a> = gimli::read::UnitHeader<Slice<'a>>;
65type IncompleteLineNumberProgram<'a> = gimli::read::IncompleteLineProgram<Slice<'a>>;
66type LineNumberProgramHeader<'a> = gimli::read::LineProgramHeader<Slice<'a>>;
67type LineProgramFileEntry<'a> = gimli::read::FileEntry<Slice<'a>>;
68
69/// This applies the offset to the address.
70///
71/// This function does not panic but would wrap around if too large or small
72/// numbers are passed.
73fn offset(addr: u64, offset: i64) -> u64 {
74    (addr as i64).wrapping_sub(offset) as u64
75}
76
77/// The error type for [`DwarfError`].
78#[non_exhaustive]
79#[derive(Clone, Copy, Debug, PartialEq, Eq)]
80pub enum DwarfErrorKind {
81    /// A compilation unit referenced by index does not exist.
82    InvalidUnitRef(usize),
83
84    /// A file record referenced by index does not exist.
85    InvalidFileRef(u64),
86
87    /// An inline record was encountered without an inlining parent.
88    UnexpectedInline,
89
90    /// The debug_ranges of a function are invalid.
91    InvertedFunctionRange,
92
93    /// The DWARF file is corrupted. See the cause for more information.
94    CorruptedData,
95}
96
97impl fmt::Display for DwarfErrorKind {
98    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
99        match self {
100            Self::InvalidUnitRef(offset) => {
101                write!(f, "compilation unit for offset {offset} does not exist")
102            }
103            Self::InvalidFileRef(id) => write!(f, "referenced file {id} does not exist"),
104            Self::UnexpectedInline => write!(f, "unexpected inline function without parent"),
105            Self::InvertedFunctionRange => write!(f, "function with inverted address range"),
106            Self::CorruptedData => write!(f, "corrupted dwarf debug data"),
107        }
108    }
109}
110
111/// An error handling [`DWARF`](trait.Dwarf.html) debugging information.
112#[derive(Debug, Error)]
113#[error("{kind}")]
114pub struct DwarfError {
115    kind: DwarfErrorKind,
116    #[source]
117    source: Option<Box<dyn Error + Send + Sync + 'static>>,
118}
119
120impl DwarfError {
121    /// Creates a new DWARF error from a known kind of error as well as an arbitrary error
122    /// payload.
123    fn new<E>(kind: DwarfErrorKind, source: E) -> Self
124    where
125        E: Into<Box<dyn Error + Send + Sync>>,
126    {
127        let source = Some(source.into());
128        Self { kind, source }
129    }
130
131    /// Returns the corresponding [`DwarfErrorKind`] for this error.
132    pub fn kind(&self) -> DwarfErrorKind {
133        self.kind
134    }
135}
136
137impl From<DwarfErrorKind> for DwarfError {
138    fn from(kind: DwarfErrorKind) -> Self {
139        Self { kind, source: None }
140    }
141}
142
143impl From<GimliError> for DwarfError {
144    fn from(e: GimliError) -> Self {
145        Self::new(DwarfErrorKind::CorruptedData, e)
146    }
147}
148
149/// DWARF section information including its data.
150///
151/// This is returned from objects implementing the [`Dwarf`] trait.
152///
153/// [`Dwarf`]: trait.Dwarf.html
154#[derive(Clone)]
155pub struct DwarfSection<'data> {
156    /// Memory address of this section in virtual memory.
157    pub address: u64,
158
159    /// File offset of this section.
160    pub offset: u64,
161
162    /// Section address alignment (power of two).
163    pub align: u64,
164
165    /// Binary data of this section.
166    pub data: Cow<'data, [u8]>,
167}
168
169impl fmt::Debug for DwarfSection<'_> {
170    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171        f.debug_struct("DwarfSection")
172            .field("address", &format_args!("{:#x}", self.address))
173            .field("offset", &format_args!("{:#x}", self.offset))
174            .field("align", &format_args!("{:#x}", self.align))
175            .field("len()", &self.data.len())
176            .finish()
177    }
178}
179
180/// Provides access to DWARF debugging information independent of the container file type.
181///
182/// When implementing this trait, verify whether the container file type supports compressed section
183/// data. If so, override the provided `section_data` method. Also, if there is a faster way to
184/// check for the existence of a section without loading its data, override `has_section`.
185pub trait Dwarf<'data> {
186    /// Returns whether the file was compiled for a big-endian or little-endian machine.
187    ///
188    /// This can usually be determined by inspecting the file's headers. Sometimes, this is also
189    /// given by the architecture.
190    fn endianity(&self) -> Endian;
191
192    /// Returns information and raw data of a section.
193    ///
194    /// The section name is given without leading punctuation, such dots or underscores. For
195    /// instance, the name of the Debug Info section would be `"debug_info"`, which translates to
196    /// `".debug_info"` in ELF and `"__debug_info"` in MachO.
197    ///
198    /// Certain containers might allow compressing section data. In this case, this function returns
199    /// the compressed data. To get uncompressed data instead, use `section_data`.
200    fn raw_section(&self, name: &str) -> Option<DwarfSection<'data>>;
201
202    /// Returns information and data of a section.
203    ///
204    /// If the section is compressed, this decompresses on the fly and returns allocated memory.
205    /// Otherwise, this should return a slice of the raw data.
206    ///
207    /// The section name is given without leading punctuation, such dots or underscores. For
208    /// instance, the name of the Debug Info section would be `"debug_info"`, which translates to
209    /// `".debug_info"` in ELF and `"__debug_info"` in MachO.
210    fn section(&self, name: &str) -> Option<DwarfSection<'data>> {
211        self.raw_section(name)
212    }
213
214    /// Determines whether the specified section exists.
215    ///
216    /// The section name is given without leading punctuation, such dots or underscores. For
217    /// instance, the name of the Debug Info section would be `"debug_info"`, which translates to
218    /// `".debug_info"` in ELF and `"__debug_info"` in MachO.
219    fn has_section(&self, name: &str) -> bool {
220        self.raw_section(name).is_some()
221    }
222}
223
224/// A row in the DWARF line program.
225#[derive(Debug)]
226struct DwarfRow {
227    address: u64,
228    file_index: u64,
229    line: Option<u64>,
230    size: Option<u64>,
231}
232
233/// A sequence in the DWARF line program.
234#[derive(Debug)]
235struct DwarfSequence {
236    start: u64,
237    end: u64,
238    rows: Vec<DwarfRow>,
239}
240
241/// Helper that prepares a DwarfLineProgram for more efficient access.
242#[derive(Debug)]
243struct DwarfLineProgram<'d> {
244    header: LineNumberProgramHeader<'d>,
245    sequences: Vec<DwarfSequence>,
246}
247
248impl<'d> DwarfLineProgram<'d> {
249    fn prepare(program: IncompleteLineNumberProgram<'d>) -> Self {
250        let mut sequences = Vec::new();
251        let mut sequence_rows = Vec::<DwarfRow>::new();
252        let mut prev_address = 0;
253        let mut state_machine = program.rows();
254
255        while let Ok(Some((_, &program_row))) = state_machine.next_row() {
256            let address = program_row.address();
257
258            // we have seen rustc emit for WASM targets a bad sequence that spans from 0 to
259            // the end of the program.  https://github.com/rust-lang/rust/issues/79410
260            // We want to skip these bad sequences. Unfortunately, code in .o files can legitimately
261            // be located at address 0, so we incorrectly skip line sequences in that case, too.
262            // See https://github.com/getsentry/symbolic/issues/471 .
263            if address == 0 {
264                continue;
265            }
266
267            if let Some(last_row) = sequence_rows.last_mut() {
268                if address >= last_row.address {
269                    last_row.size = Some(address - last_row.address);
270                }
271            }
272
273            if program_row.end_sequence() {
274                // Theoretically, there could be multiple DW_LNE_end_sequence in a row. We're not
275                // interested in empty sequences, so we can skip them completely.
276                if !sequence_rows.is_empty() {
277                    sequences.push(DwarfSequence {
278                        start: sequence_rows[0].address,
279                        // Take a defensive approach and ensure that `high_address` always covers
280                        // the last encountered row, assuming a 1 byte instruction.
281                        end: if address < prev_address {
282                            prev_address + 1
283                        } else {
284                            address
285                        },
286                        rows: std::mem::take(&mut sequence_rows),
287                    });
288                }
289                prev_address = 0;
290            } else if address < prev_address {
291                // The standard says:
292                // "Within a sequence, addresses and operation pointers may only increase."
293                // So this row is invalid, we can ignore it.
294                //
295                // If we wanted to handle this, we could start a new sequence
296                // here, but let's wait until that is needed.
297            } else {
298                let file_index = program_row.file_index();
299                let line = program_row.line().map(|v| v.get());
300                let mut duplicate = false;
301                if let Some(last_row) = sequence_rows.last_mut() {
302                    if last_row.address == address {
303                        last_row.file_index = file_index;
304                        last_row.line = line;
305                        duplicate = true;
306                    }
307                }
308                if !duplicate {
309                    sequence_rows.push(DwarfRow {
310                        address,
311                        file_index,
312                        line,
313                        size: None,
314                    });
315                }
316                prev_address = address;
317            }
318        }
319
320        if !sequence_rows.is_empty() {
321            // A sequence without an end_sequence row.
322            // Let's assume the last row covered 1 byte.
323            let start = sequence_rows[0].address;
324            let end = prev_address + 1;
325            sequences.push(DwarfSequence {
326                start,
327                end,
328                rows: sequence_rows,
329            });
330        }
331
332        // Sequences are not guaranteed to be in order.
333        sequences.sort_by_key(|x| x.start);
334
335        DwarfLineProgram {
336            header: state_machine.header().clone(),
337            sequences,
338        }
339    }
340
341    /// Finds the first sequence which contains rows overlapping with the specified `range`.
342    ///
343    ///
344    /// ```text
345    ///       ┌─────────────────────────────────────────────────┐┌───────────────────────┐
346    ///       │┌─────────────────┐┌───┐┌──────────────────────┐ ││┌─────────────────────┐│
347    ///       ││Row 1            ││ 2 ││Row 3                 │ │││Row X                ││
348    ///       │└─────────────────┘└───┘└──────────────────────┘ ││└─────────────────────┘│
349    ///       └─────────────────────────────────────────────────┘└───────────────────────┘
350    ///
351    ///                             ├────────────────────────┘ Range A
352    ///                             └────────────────────────┬────────┘ Range B
353    /// └────────────────────────────────────────────────────┘ Range C
354    /// ```
355    ///
356    /// Range `A` and `B` yield rows 2 and 3, range `C` yields all rows of the sequence,
357    /// `1`, `2`, and `3`.
358    pub fn get_rows(&self, range: &Range) -> &[DwarfRow] {
359        for seq in &self.sequences {
360            if range.begin >= seq.end || seq.start >= range.end {
361                continue;
362            }
363
364            let from = match seq.rows.binary_search_by_key(&range.begin, |x| x.address) {
365                Ok(idx) => idx,
366                // If the range begins before the sequence, we clamp it to the beginning of the
367                // sequence as a range determined by the low and high pc may include a function
368                // prologue which is not mapped into the addr/line matrix.
369                //
370                // Since the sequences are ordered by address, this can only happen if the beginning
371                // of the range is not part of another sequence already.
372                Err(0) => 0,
373                Err(next_idx) => next_idx - 1,
374            };
375
376            let len = seq.rows[from..]
377                .binary_search_by_key(&range.end, |x| x.address)
378                // Similarly here, we clamp the end of the range to the end of the sequence.
379                //
380                // There is a potential argument to be made that a range can span multiple
381                // sequences, the spec isn't entirely clear about whether this is possible.
382                //
383                // For the moment we haven't observed any such case.
384                .unwrap_or_else(|e| e);
385
386            return &seq.rows[from..from + len];
387        }
388        &[]
389    }
390}
391
392/// A slim wrapper around a DWARF unit.
393#[derive(Clone, Copy, Debug)]
394struct UnitRef<'d, 'a> {
395    info: &'a DwarfInfo<'d>,
396    unit: &'a Unit<'d>,
397}
398
399impl<'d> UnitRef<'d, '_> {
400    /// Resolve the binary value of an attribute.
401    #[inline(always)]
402    fn slice_value(&self, value: AttributeValue<Slice<'d>>) -> Option<&'d [u8]> {
403        self.info
404            .attr_string(self.unit, value)
405            .map(|reader| reader.slice())
406            .ok()
407    }
408
409    /// Resolve the actual string value of an attribute.
410    #[inline(always)]
411    fn string_value(&self, value: AttributeValue<Slice<'d>>) -> Option<Cow<'d, str>> {
412        let slice = self.slice_value(value)?;
413        Some(String::from_utf8_lossy(slice))
414    }
415
416    /// Resolves an entry and if found invokes a function to transform it.
417    ///
418    /// As this might resolve into cached information the data borrowed from
419    /// abbrev can only be temporarily accessed in the callback.
420    fn resolve_reference<T, F>(&self, attr: Attribute<'d>, f: F) -> Result<Option<T>, DwarfError>
421    where
422        F: FnOnce(Self, &Die<'d>) -> Result<Option<T>, DwarfError>,
423    {
424        let (unit, offset) = match attr.value() {
425            AttributeValue::UnitRef(offset) => (*self, offset),
426            AttributeValue::DebugInfoRef(offset) => self.info.find_unit_offset(offset)?,
427            // TODO: There is probably more that can come back here.
428            _ => return Ok(None),
429        };
430
431        let mut entries = unit.unit.entries_at_offset(offset)?;
432        entries.next_entry()?;
433
434        if let Some(entry) = entries.current() {
435            f(unit, entry)
436        } else {
437            Ok(None)
438        }
439    }
440
441    /// Returns the offset of this unit within its section.
442    fn offset(&self) -> UnitSectionOffset {
443        self.unit.header.offset()
444    }
445
446    /// Returns the source language declared in the root DIE of this compilation unit.
447    fn language(&self) -> Result<Option<Language>, DwarfError> {
448        let mut entries = self.unit.entries();
449        let Some(root_entry) = entries.next_dfs()? else {
450            return Ok(None);
451        };
452        let Some(AttributeValue::Language(lang)) = root_entry.attr_value(constants::DW_AT_language)
453        else {
454            return Ok(None);
455        };
456        Ok(Some(language_from_dwarf(lang)))
457    }
458
459    /// Maximum recursion depth for following `DW_AT_abstract_origin` chains, matching the limit
460    /// used by elfutils `dwarf_attr_integrate`.
461    const MAX_ABSTRACT_ORIGIN_DEPTH: u8 = 16;
462
463    /// Resolves the source language for a DIE by following `DW_AT_abstract_origin` chains,
464    /// including across compilation unit boundaries. `depth` limits recursion to guard against
465    /// cycles or malformed DWARF.
466    fn resolve_entry_language(
467        &self,
468        entry: &Die<'d>,
469        depth: u8,
470    ) -> Result<Option<Language>, DwarfError> {
471        if depth == 0 {
472            return Ok(None);
473        }
474        if let Some(attr) = entry.attr(constants::DW_AT_abstract_origin) {
475            return self.resolve_reference(*attr, |ref_unit, ref_entry| {
476                // Recurse first to follow deeper chains.
477                if let Some(lang) = ref_unit.resolve_entry_language(ref_entry, depth - 1)? {
478                    return Ok(Some(lang));
479                }
480                // No deeper reference: use the CU language if this is a cross-unit ref.
481                if self.offset() != ref_unit.offset() {
482                    ref_unit.language()
483                } else {
484                    Ok(None)
485                }
486            });
487        }
488        Ok(None)
489    }
490
491    /// Resolves the function name of a debug entry.
492    fn resolve_function_name(
493        &self,
494        entry: &Die<'d>,
495        language: Language,
496        bcsymbolmap: Option<&'d BcSymbolMap<'d>>,
497        prior_offset: Option<UnitOffset>,
498    ) -> Result<Option<Name<'d>>, DwarfError> {
499        let mut fallback_name = None;
500        let mut reference_target = None;
501
502        for attr in entry.attrs() {
503            match attr.name() {
504                // Prioritize these. If we get them, take them.
505                constants::DW_AT_linkage_name | constants::DW_AT_MIPS_linkage_name => {
506                    return Ok(self
507                        .string_value(attr.value())
508                        .map(|n| resolve_cow_name(bcsymbolmap, n))
509                        .map(|n| Name::new(n, NameMangling::Mangled, language)));
510                }
511                constants::DW_AT_name => {
512                    fallback_name = Some(attr);
513                }
514                constants::DW_AT_abstract_origin | constants::DW_AT_specification => {
515                    reference_target = Some(attr);
516                }
517                _ => {}
518            }
519        }
520
521        if let Some(attr) = fallback_name {
522            return Ok(self
523                .string_value(attr.value())
524                .map(|n| resolve_cow_name(bcsymbolmap, n))
525                .map(|n| Name::new(n, NameMangling::Unmangled, language)));
526        }
527
528        if let Some(attr) = reference_target {
529            return self.resolve_reference(*attr, |ref_unit, ref_entry| {
530                // Self-references may have a layer of indirection. Avoid infinite recursion
531                // in this scenario.
532                if let Some(prior) = prior_offset {
533                    if self.offset() == ref_unit.offset() && prior == ref_entry.offset() {
534                        return Ok(None);
535                    }
536                }
537
538                if self.offset() != ref_unit.offset() || entry.offset() != ref_entry.offset() {
539                    ref_unit.resolve_function_name(
540                        ref_entry,
541                        language,
542                        bcsymbolmap,
543                        Some(entry.offset()),
544                    )
545                } else {
546                    Ok(None)
547                }
548            });
549        }
550
551        Ok(None)
552    }
553}
554
555/// Wrapper around a DWARF Unit.
556#[derive(Debug)]
557struct DwarfUnit<'d, 'a> {
558    inner: UnitRef<'d, 'a>,
559    bcsymbolmap: Option<&'d BcSymbolMap<'d>>,
560    language: Language,
561    line_program: Option<DwarfLineProgram<'d>>,
562    prefer_dwarf_names: bool,
563}
564
565impl<'d, 'a> DwarfUnit<'d, 'a> {
566    /// Creates a DWARF unit from the gimli `Unit` type.
567    fn from_unit(
568        unit: &'a Unit<'d>,
569        info: &'a DwarfInfo<'d>,
570        bcsymbolmap: Option<&'d BcSymbolMap<'d>>,
571    ) -> Result<Option<Self>, DwarfError> {
572        let inner = UnitRef { info, unit };
573        let mut entries = unit.entries();
574        let entry = match entries.next_dfs()? {
575            Some(entry) => entry,
576            None => return Err(gimli::read::Error::MissingUnitDie.into()),
577        };
578
579        // Clang's LLD might eliminate an entire compilation unit and simply set the low_pc to zero
580        // and remove all range entries to indicate that it is missing. Skip such a unit, as it does
581        // not contain any code that can be executed. Special case relocatable objects, as here the
582        // range information has not been written yet and all units look like this.
583        if info.kind != ObjectKind::Relocatable
584            && unit.low_pc == 0
585            && entry.attr(constants::DW_AT_ranges).is_none()
586        {
587            return Ok(None);
588        }
589
590        let language = match entry.attr_value(constants::DW_AT_language) {
591            Some(AttributeValue::Language(lang)) => language_from_dwarf(lang),
592            _ => Language::Unknown,
593        };
594
595        let line_program = unit
596            .line_program
597            .as_ref()
598            .map(|program| DwarfLineProgram::prepare(program.clone()));
599
600        // The value of DW_AT_producer may be an in-place string or a
601        // reference into the debug_str section. We use `string_value`
602        // to resolve it correctly in either case.
603        let producer = entry
604            .attr_value(constants::DW_AT_producer)
605            .and_then(|av| av.string_value(&info.inner.debug_str));
606
607        // Trust the symbol table more to contain accurate mangled names. However, since Dart's name
608        // mangling is lossy, we need to load the demangled name instead.
609        let prefer_dwarf_names = producer.as_deref() == Some(b"Dart VM");
610
611        Ok(Some(DwarfUnit {
612            inner,
613            bcsymbolmap,
614            language,
615            line_program,
616            prefer_dwarf_names,
617        }))
618    }
619
620    /// The path of the compilation directory. File names are usually relative to this path.
621    fn compilation_dir(&self) -> &'d [u8] {
622        match self.inner.unit.comp_dir {
623            Some(ref dir) => resolve_byte_name(self.bcsymbolmap, dir.slice()),
624            None => &[],
625        }
626    }
627
628    /// Parses the call site and range lists of this Debugging Information Entry.
629    ///
630    /// This method consumes the attributes of the DIE. This means that the `entries` iterator must
631    /// be placed just before the attributes of the DIE. On return, the `entries` iterator is placed
632    /// after the attributes, ready to read the next DIE's abbrev.
633    fn parse_ranges<'r>(
634        &self,
635        entries: &mut EntriesRaw<'d, '_>,
636        abbrev: &gimli::Abbreviation,
637        range_buf: &'r mut Vec<Range>,
638    ) -> Result<(&'r mut Vec<Range>, CallLocation), DwarfError> {
639        range_buf.clear();
640
641        let mut call_line = None;
642        let mut call_file = None;
643        let mut low_pc = None;
644        let mut high_pc = None;
645        let mut high_pc_rel = None;
646
647        let kind = self.inner.info.kind;
648
649        for spec in abbrev.attributes() {
650            let attr = entries.read_attribute(*spec)?;
651            match attr.name() {
652                constants::DW_AT_low_pc => match attr.value() {
653                    AttributeValue::Addr(addr) => low_pc = Some(addr),
654                    AttributeValue::DebugAddrIndex(index) => {
655                        low_pc = Some(self.inner.info.address(self.inner.unit, index)?)
656                    }
657                    _ => return Err(GimliError::UnsupportedAttributeForm(attr.form()).into()),
658                },
659                constants::DW_AT_high_pc => match attr.value() {
660                    AttributeValue::Addr(addr) => high_pc = Some(addr),
661                    AttributeValue::DebugAddrIndex(index) => {
662                        high_pc = Some(self.inner.info.address(self.inner.unit, index)?)
663                    }
664                    AttributeValue::Udata(size) => high_pc_rel = Some(size),
665                    _ => return Err(GimliError::UnsupportedAttributeForm(attr.form()).into()),
666                },
667                constants::DW_AT_call_line => match attr.value() {
668                    AttributeValue::Udata(line) => call_line = Some(line),
669                    _ => return Err(GimliError::UnsupportedAttributeForm(attr.form()).into()),
670                },
671                constants::DW_AT_call_file => match attr.value() {
672                    AttributeValue::FileIndex(file) => call_file = Some(file),
673                    _ => return Err(GimliError::UnsupportedAttributeForm(attr.form()).into()),
674                },
675                constants::DW_AT_ranges
676                | constants::DW_AT_rnglists_base
677                | constants::DW_AT_start_scope => {
678                    match self.inner.info.attr_ranges(self.inner.unit, attr.value())? {
679                        Some(mut ranges) => {
680                            while let Some(range) = match ranges.next() {
681                                Ok(range) => range,
682                                // We have seen broken ranges for some WASM debug files generated by
683                                // emscripten. They mostly manifest themselves in these errors, which
684                                // are triggered by an inverted range (going high to low).
685                                // See a few more examples of broken ranges here:
686                                // https://github.com/emscripten-core/emscripten/issues/15552
687                                Err(gimli::Error::InvalidCfiSetLoc(_)) => None,
688                                Err(err) => {
689                                    return Err(err.into());
690                                }
691                            } {
692                                // A range that begins at 0 indicates code that was eliminated by
693                                // the linker, see below.
694                                if range.begin > 0 || kind == ObjectKind::Relocatable {
695                                    range_buf.push(range);
696                                }
697                            }
698                        }
699                        None => continue,
700                    }
701                }
702                _ => continue,
703            }
704        }
705
706        let call_location = CallLocation {
707            call_file,
708            call_line,
709        };
710
711        if range_buf.is_empty() {
712            if let Some(range) = Self::convert_pc_range(low_pc, high_pc, high_pc_rel, kind)? {
713                range_buf.push(range);
714            }
715        }
716
717        Ok((range_buf, call_location))
718    }
719
720    fn convert_pc_range(
721        low_pc: Option<u64>,
722        high_pc: Option<u64>,
723        high_pc_rel: Option<u64>,
724        kind: ObjectKind,
725    ) -> Result<Option<Range>, DwarfError> {
726        // To go by the logic in dwarf2read, a `low_pc` of 0 can indicate an
727        // eliminated duplicate when the GNU linker is used. In relocatable
728        // objects, all functions are at `0` since they have not been placed
729        // yet, so we want to retain them.
730        let low_pc = match low_pc {
731            Some(low_pc) if low_pc != 0 || kind == ObjectKind::Relocatable => low_pc,
732            _ => return Ok(None),
733        };
734
735        let high_pc = match (high_pc, high_pc_rel) {
736            (Some(high_pc), _) => high_pc,
737            (_, Some(high_pc_rel)) => low_pc.wrapping_add(high_pc_rel),
738            _ => return Ok(None),
739        };
740
741        if low_pc == high_pc {
742            // Most likely low_pc == high_pc means the DIE should be ignored.
743            // https://sourceware.org/ml/gdb-patches/2011-03/msg00739.html
744            return Ok(None);
745        }
746
747        if low_pc == u64::MAX || low_pc == u64::MAX - 1 {
748            // Similarly, u64::MAX/u64::MAX-1 may be used to indicate deleted code.
749            // See https://reviews.llvm.org/D59553
750            return Ok(None);
751        }
752
753        if low_pc > high_pc {
754            return Err(DwarfErrorKind::InvertedFunctionRange.into());
755        }
756
757        Ok(Some(Range {
758            begin: low_pc,
759            end: high_pc,
760        }))
761    }
762
763    /// Resolves file information from a line program.
764    fn file_info(
765        &self,
766        line_program: &LineNumberProgramHeader<'d>,
767        file: &LineProgramFileEntry<'d>,
768    ) -> FileInfo<'d> {
769        FileInfo::new(
770            Cow::Borrowed(resolve_byte_name(
771                self.bcsymbolmap,
772                file.directory(line_program)
773                    .and_then(|attr| self.inner.slice_value(attr))
774                    .unwrap_or_default(),
775            )),
776            Cow::Borrowed(resolve_byte_name(
777                self.bcsymbolmap,
778                self.inner.slice_value(file.path_name()).unwrap_or_default(),
779            )),
780        )
781    }
782
783    /// Resolves a file entry by its index.
784    fn resolve_file(&self, file_id: u64) -> Option<FileInfo<'d>> {
785        let line_program = match self.line_program {
786            Some(ref program) => &program.header,
787            None => return None,
788        };
789
790        line_program
791            .file(file_id)
792            .map(|file| self.file_info(line_program, file))
793    }
794
795    /// Resolves the name of a function from the symbol table.
796    fn resolve_symbol_name(&self, address: u64, language: Language) -> Option<Name<'d>> {
797        let symbol = self.inner.info.symbol_map.lookup_exact(address)?;
798        let name = resolve_cow_name(self.bcsymbolmap, symbol.name.clone()?);
799        Some(Name::new(name, NameMangling::Mangled, language))
800    }
801
802    /// Resolves the source language for a function by following `DW_AT_abstract_origin` to the
803    /// origin compilation unit when crossing unit boundaries.
804    ///
805    /// With LTO, the linker may create artificial compilation units whose `DW_AT_language`
806    /// does not reflect the original source language (e.g., a C++ CU containing functions
807    /// originally written in C). When such a CU's subprogram carries a cross-unit
808    /// `DW_AT_abstract_origin`, the referenced CU's language is more authoritative.
809    fn resolve_function_language(&self, entry: &Die<'d>, fallback_language: Language) -> Language {
810        self.inner
811            .resolve_entry_language(entry, UnitRef::MAX_ABSTRACT_ORIGIN_DEPTH)
812            .ok()
813            .flatten()
814            .unwrap_or(fallback_language)
815    }
816
817    /// Parses any DW_TAG_subprogram DIEs in the DIE subtree.
818    fn parse_functions(
819        &self,
820        depth: isize,
821        entries: &mut EntriesRaw<'d, '_>,
822        output: &mut FunctionsOutput<'_, 'd>,
823    ) -> Result<(), DwarfError> {
824        while !entries.is_empty() {
825            let dw_die_offset = entries.next_offset();
826            let next_depth = entries.next_depth();
827            if next_depth <= depth {
828                return Ok(());
829            }
830            if let Some(abbrev) = entries.read_abbreviation()? {
831                if abbrev.tag() == constants::DW_TAG_subprogram {
832                    self.parse_function(dw_die_offset, next_depth, entries, abbrev, output)?;
833                } else {
834                    entries.skip_attributes(abbrev.attributes())?;
835                }
836            }
837        }
838        Ok(())
839    }
840
841    /// Parse a single function from a DWARF DIE subtree.
842    ///
843    /// The `entries` iterator must be placed after the abbrev / before the attributes of the
844    /// function DIE.
845    ///
846    /// This method can call itself recursively if another DW_TAG_subprogram entry is encountered
847    /// in the subtree.
848    ///
849    /// On return, the `entries` iterator is placed after the attributes of the last-read DIE.
850    fn parse_function(
851        &self,
852        dw_die_offset: gimli::UnitOffset<usize>,
853        depth: isize,
854        entries: &mut EntriesRaw<'d, '_>,
855        abbrev: &gimli::Abbreviation,
856        output: &mut FunctionsOutput<'_, 'd>,
857    ) -> Result<(), DwarfError> {
858        let (ranges, _) = self.parse_ranges(entries, abbrev, &mut output.range_buf)?;
859
860        let seen_ranges = &mut *output.seen_ranges;
861        ranges.retain(|range| {
862            // Filter out empty and reversed ranges.
863            if range.begin > range.end {
864                return false;
865            }
866
867            // We have seen duplicate top-level function entries being yielded from the
868            // [`DwarfFunctionIterator`], which combined with recursively walking its inlinees can
869            // blow past symcache limits.
870            // We suspect the reason is that the the same top-level functions might be defined in
871            // different compile units. We suspect this might be caused by link-time deduplication
872            // which merges templated code that is being generated multiple times in each
873            // compilation unit. We make sure to detect this here, so we can avoid creating these
874            // duplicates as early as possible.
875            let address = offset(range.begin, self.inner.info.address_offset);
876            let size = range.end - range.begin;
877
878            seen_ranges.insert((address, size))
879        });
880
881        // Ranges can be empty for three reasons: (1) the function is a no-op and does not
882        // contain any code, (2) the function did contain eliminated dead code, or (3) some
883        // tooling created garbage reversed ranges which we filtered out.
884        // In the dead code case, a surrogate DIE remains with `DW_AT_low_pc(0)` and empty ranges.
885        // That DIE might still contain inlined functions with actual ranges - these must be skipped.
886        // However, non-inlined functions may be present in this subtree, so we must still descend
887        // into it.
888        if ranges.is_empty() {
889            return self.parse_functions(depth, entries, output);
890        }
891
892        // Resolve functions in the symbol table first. Only if there is no entry, fall back
893        // to debug information only if there is no match. Sometimes, debug info contains a
894        // lesser quality of symbol names.
895        //
896        // XXX: Maybe we should actually parse the ranges in the resolve function and always
897        // look at the symbol table based on the start of the DIE range.
898
899        let entry = self.inner.unit.entry(dw_die_offset)?;
900        // With LTO the current CU may be an artificial unit with an incorrect language. Follow
901        // DW_AT_abstract_origin cross-unit to find the true source language. The resolved
902        // language is also propagated to all inlinees of this function.
903        let language = self.resolve_function_language(&entry, self.language);
904
905        let symbol_name = if self.prefer_dwarf_names {
906            None
907        } else {
908            let first_range_begin = ranges.iter().map(|range| range.begin).min().unwrap();
909            let function_address = offset(first_range_begin, self.inner.info.address_offset);
910            self.resolve_symbol_name(function_address, language)
911        };
912
913        let name = symbol_name
914            .or_else(|| {
915                self.inner
916                    .resolve_function_name(&entry, language, self.bcsymbolmap, None)
917                    .ok()
918                    .flatten()
919            })
920            .unwrap_or_else(|| Name::new("", NameMangling::Unmangled, language));
921
922        // Create one function per range. In the common case there is only one range, so
923        // we usually only have one function builder here.
924        let mut builders: Vec<(Range, FunctionBuilder)> = ranges
925            .iter()
926            .map(|range| {
927                let address = offset(range.begin, self.inner.info.address_offset);
928                let size = range.end - range.begin;
929                (
930                    *range,
931                    FunctionBuilder::new(name.clone(), self.compilation_dir(), address, size),
932                )
933            })
934            .collect();
935
936        self.parse_function_children(depth, 0, entries, &mut builders, output, language)?;
937
938        if let Some(line_program) = &self.line_program {
939            for (range, builder) in &mut builders {
940                for row in line_program.get_rows(range) {
941                    let address = offset(row.address, self.inner.info.address_offset);
942                    let size = row.size;
943                    let file = self.resolve_file(row.file_index).unwrap_or_default();
944                    let line = row.line.unwrap_or(0);
945                    builder.add_leaf_line(address, size, file, line);
946                }
947            }
948        }
949
950        for (_range, builder) in builders {
951            output.functions.push(builder.finish());
952        }
953
954        Ok(())
955    }
956
957    /// Traverses a subtree during function parsing.
958    fn parse_function_children(
959        &self,
960        depth: isize,
961        inline_depth: u32,
962        entries: &mut EntriesRaw<'d, '_>,
963        builders: &mut [(Range, FunctionBuilder<'d>)],
964        output: &mut FunctionsOutput<'_, 'd>,
965        language: Language,
966    ) -> Result<(), DwarfError> {
967        while !entries.is_empty() {
968            let dw_die_offset = entries.next_offset();
969            let next_depth = entries.next_depth();
970            if next_depth <= depth {
971                return Ok(());
972            }
973            let abbrev = match entries.read_abbreviation()? {
974                Some(abbrev) => abbrev,
975                None => continue,
976            };
977            match abbrev.tag() {
978                constants::DW_TAG_subprogram => {
979                    // Nested subprograms resolve their own language independently.
980                    self.parse_function(dw_die_offset, next_depth, entries, abbrev, output)?;
981                }
982                constants::DW_TAG_inlined_subroutine => {
983                    self.parse_inlinee(
984                        dw_die_offset,
985                        next_depth,
986                        inline_depth,
987                        entries,
988                        abbrev,
989                        builders,
990                        output,
991                        language,
992                    )?;
993                }
994                _ => {
995                    entries.skip_attributes(abbrev.attributes())?;
996                }
997            }
998        }
999        Ok(())
1000    }
1001
1002    /// Recursively parse the inlinees of a function from a DWARF DIE subtree.
1003    ///
1004    /// The `entries` iterator must be placed just before the attributes of the inline function DIE.
1005    ///
1006    /// This method calls itself recursively for other DW_TAG_inlined_subroutine entries in the
1007    /// subtree. It can also call `parse_function` if a `DW_TAG_subprogram` entry is encountered.
1008    ///
1009    /// On return, the `entries` iterator is placed after the attributes of the last-read DIE.
1010    #[allow(clippy::too_many_arguments)]
1011    fn parse_inlinee(
1012        &self,
1013        dw_die_offset: gimli::UnitOffset<usize>,
1014        depth: isize,
1015        inline_depth: u32,
1016        entries: &mut EntriesRaw<'d, '_>,
1017        abbrev: &gimli::Abbreviation,
1018        builders: &mut [(Range, FunctionBuilder<'d>)],
1019        output: &mut FunctionsOutput<'_, 'd>,
1020        language: Language,
1021    ) -> Result<(), DwarfError> {
1022        let (ranges, call_location) = self.parse_ranges(entries, abbrev, &mut output.range_buf)?;
1023
1024        ranges.retain(|range| range.end > range.begin);
1025
1026        // Ranges can be empty for three reasons: (1) the function is a no-op and does not
1027        // contain any code, (2) the function did contain eliminated dead code, or (3) some
1028        // tooling created garbage reversed ranges which we filtered out.
1029        // In the dead code case, a surrogate DIE remains with `DW_AT_low_pc(0)` and empty ranges.
1030        // That DIE might still contain inlined functions with actual ranges - these must be skipped.
1031        // However, non-inlined functions may be present in this subtree, so we must still descend
1032        // into it.
1033        if ranges.is_empty() {
1034            return self.parse_functions(depth, entries, output);
1035        }
1036
1037        let entry = self.inner.unit.entry(dw_die_offset)?;
1038        let language = self.resolve_function_language(&entry, language);
1039
1040        // Use the language resolved for the enclosing top-level subprogram rather than
1041        // self.language: the inlinee's DW_AT_abstract_origin may resolve to a partial unit
1042        // which carries the wrong language (e.g. a C++ LTO partial unit for C code).
1043        let name = self
1044            .inner
1045            .resolve_function_name(&entry, language, self.bcsymbolmap, None)
1046            .ok()
1047            .flatten()
1048            .unwrap_or_else(|| Name::new("", NameMangling::Unmangled, language));
1049
1050        let call_file = call_location
1051            .call_file
1052            .and_then(|i| self.resolve_file(i))
1053            .unwrap_or_default();
1054        let call_line = call_location.call_line.unwrap_or(0);
1055
1056        // Create a separate inlinee for each range.
1057        for range in ranges.iter() {
1058            // Find the builder for the outer function that covers this range. Usually there's only
1059            // one outer range, so only one builder.
1060            let builder = match builders.iter_mut().find(|(outer_range, _builder)| {
1061                range.begin >= outer_range.begin && range.begin < outer_range.end
1062            }) {
1063                Some((_outer_range, builder)) => builder,
1064                None => continue,
1065            };
1066
1067            let address = offset(range.begin, self.inner.info.address_offset);
1068            let size = range.end - range.begin;
1069            builder.add_inlinee(
1070                inline_depth,
1071                name.clone(),
1072                address,
1073                size,
1074                call_file.clone(),
1075                call_line,
1076            );
1077        }
1078
1079        self.parse_function_children(depth, inline_depth + 1, entries, builders, output, language)
1080    }
1081
1082    /// Collects all functions within this compilation unit.
1083    fn functions(
1084        &self,
1085        seen_ranges: &mut BTreeSet<(u64, u64)>,
1086    ) -> Result<Vec<Function<'d>>, DwarfError> {
1087        let mut entries = self.inner.unit.entries_raw(None)?;
1088        let mut output = FunctionsOutput::with_seen_ranges(seen_ranges);
1089        self.parse_functions(-1, &mut entries, &mut output)?;
1090        Ok(output.functions)
1091    }
1092}
1093
1094/// The state we pass around during function parsing.
1095struct FunctionsOutput<'a, 'd> {
1096    /// The list of fully-parsed outer functions. Items are appended whenever we are done
1097    /// parsing an entire function.
1098    pub functions: Vec<Function<'d>>,
1099    /// A scratch buffer which avoids frequent allocations.
1100    pub range_buf: Vec<Range>,
1101    /// The set of `(address, size)` ranges of the functions we've already parsed.
1102    pub seen_ranges: &'a mut BTreeSet<(u64, u64)>,
1103}
1104
1105impl<'a> FunctionsOutput<'a, '_> {
1106    pub fn with_seen_ranges(seen_ranges: &'a mut BTreeSet<(u64, u64)>) -> Self {
1107        Self {
1108            functions: Vec::new(),
1109            range_buf: Vec::new(),
1110            seen_ranges,
1111        }
1112    }
1113}
1114
1115/// For returning (partial) call location information from `parse_ranges`.
1116#[derive(Debug, Default, Clone, Copy)]
1117struct CallLocation {
1118    pub call_file: Option<u64>,
1119    pub call_line: Option<u64>,
1120}
1121
1122/// Converts a DWARF language number into our `Language` type.
1123fn language_from_dwarf(language: gimli::DwLang) -> Language {
1124    match language {
1125        constants::DW_LANG_C => Language::C,
1126        constants::DW_LANG_C11 => Language::C,
1127        constants::DW_LANG_C89 => Language::C,
1128        constants::DW_LANG_C99 => Language::C,
1129        constants::DW_LANG_C_plus_plus => Language::Cpp,
1130        constants::DW_LANG_C_plus_plus_03 => Language::Cpp,
1131        constants::DW_LANG_C_plus_plus_11 => Language::Cpp,
1132        constants::DW_LANG_C_plus_plus_14 => Language::Cpp,
1133        constants::DW_LANG_D => Language::D,
1134        constants::DW_LANG_Go => Language::Go,
1135        constants::DW_LANG_ObjC => Language::ObjC,
1136        constants::DW_LANG_ObjC_plus_plus => Language::ObjCpp,
1137        constants::DW_LANG_Rust => Language::Rust,
1138        constants::DW_LANG_Swift => Language::Swift,
1139        _ => Language::Unknown,
1140    }
1141}
1142
1143/// Data of a specific DWARF section.
1144struct DwarfSectionData<'data, S> {
1145    data: Cow<'data, [u8]>,
1146    endianity: Endian,
1147    _ph: PhantomData<S>,
1148}
1149
1150impl<'data, S> DwarfSectionData<'data, S>
1151where
1152    S: gimli::read::Section<Slice<'data>>,
1153{
1154    /// Loads data for this section from the object file.
1155    fn load<D>(dwarf: &D) -> Self
1156    where
1157        D: Dwarf<'data>,
1158    {
1159        DwarfSectionData {
1160            data: dwarf
1161                .section(&S::section_name()[1..])
1162                .map(|section| section.data)
1163                .unwrap_or_default(),
1164            endianity: dwarf.endianity(),
1165            _ph: PhantomData,
1166        }
1167    }
1168
1169    /// Creates a gimli dwarf section object from the loaded data.
1170    fn to_gimli(&'data self) -> S {
1171        S::from(Slice::new(&self.data, self.endianity))
1172    }
1173}
1174
1175impl<'d, S> fmt::Debug for DwarfSectionData<'d, S>
1176where
1177    S: gimli::read::Section<Slice<'d>>,
1178{
1179    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1180        let owned = match self.data {
1181            Cow::Owned(_) => true,
1182            Cow::Borrowed(_) => false,
1183        };
1184
1185        f.debug_struct("DwarfSectionData")
1186            .field("type", &S::section_name())
1187            .field("endianity", &self.endianity)
1188            .field("len()", &self.data.len())
1189            .field("owned()", &owned)
1190            .finish()
1191    }
1192}
1193
1194/// All DWARF sections that are needed by `DwarfDebugSession`.
1195struct DwarfSections<'data> {
1196    debug_abbrev: DwarfSectionData<'data, gimli::read::DebugAbbrev<Slice<'data>>>,
1197    debug_addr: DwarfSectionData<'data, gimli::read::DebugAddr<Slice<'data>>>,
1198    debug_aranges: DwarfSectionData<'data, gimli::read::DebugAranges<Slice<'data>>>,
1199    debug_info: DwarfSectionData<'data, gimli::read::DebugInfo<Slice<'data>>>,
1200    debug_line: DwarfSectionData<'data, gimli::read::DebugLine<Slice<'data>>>,
1201    debug_line_str: DwarfSectionData<'data, gimli::read::DebugLineStr<Slice<'data>>>,
1202    debug_names: DwarfSectionData<'data, gimli::read::DebugNames<Slice<'data>>>,
1203    debug_str: DwarfSectionData<'data, gimli::read::DebugStr<Slice<'data>>>,
1204    debug_str_offsets: DwarfSectionData<'data, gimli::read::DebugStrOffsets<Slice<'data>>>,
1205    debug_ranges: DwarfSectionData<'data, gimli::read::DebugRanges<Slice<'data>>>,
1206    debug_rnglists: DwarfSectionData<'data, gimli::read::DebugRngLists<Slice<'data>>>,
1207    debug_macinfo: DwarfSectionData<'data, gimli::read::DebugMacinfo<Slice<'data>>>,
1208    debug_macro: DwarfSectionData<'data, gimli::read::DebugMacro<Slice<'data>>>,
1209}
1210
1211impl<'data> DwarfSections<'data> {
1212    /// Loads all sections from a DWARF object.
1213    fn from_dwarf<D>(dwarf: &D) -> Self
1214    where
1215        D: Dwarf<'data>,
1216    {
1217        DwarfSections {
1218            debug_abbrev: DwarfSectionData::load(dwarf),
1219            debug_addr: DwarfSectionData::load(dwarf),
1220            debug_aranges: DwarfSectionData::load(dwarf),
1221            debug_info: DwarfSectionData::load(dwarf),
1222            debug_line: DwarfSectionData::load(dwarf),
1223            debug_line_str: DwarfSectionData::load(dwarf),
1224            debug_names: DwarfSectionData::load(dwarf),
1225            debug_str: DwarfSectionData::load(dwarf),
1226            debug_str_offsets: DwarfSectionData::load(dwarf),
1227            debug_ranges: DwarfSectionData::load(dwarf),
1228            debug_rnglists: DwarfSectionData::load(dwarf),
1229            debug_macinfo: DwarfSectionData::load(dwarf),
1230            debug_macro: DwarfSectionData::load(dwarf),
1231        }
1232    }
1233}
1234
1235struct DwarfInfo<'data> {
1236    inner: DwarfInner<'data>,
1237    headers: Vec<UnitHeader<'data>>,
1238    units: Vec<OnceCell<Option<Unit<'data>>>>,
1239    symbol_map: SymbolMap<'data>,
1240    address_offset: i64,
1241    kind: ObjectKind,
1242}
1243
1244impl<'d> Deref for DwarfInfo<'d> {
1245    type Target = DwarfInner<'d>;
1246
1247    fn deref(&self) -> &Self::Target {
1248        &self.inner
1249    }
1250}
1251
1252impl<'d> DwarfInfo<'d> {
1253    /// Parses DWARF information from its raw section data.
1254    pub fn parse(
1255        sections: &'d DwarfSections<'d>,
1256        symbol_map: SymbolMap<'d>,
1257        address_offset: i64,
1258        kind: ObjectKind,
1259    ) -> Result<Self, DwarfError> {
1260        let mut inner = gimli::read::Dwarf {
1261            abbreviations_cache: Default::default(),
1262            debug_abbrev: sections.debug_abbrev.to_gimli(),
1263            debug_addr: sections.debug_addr.to_gimli(),
1264            debug_aranges: sections.debug_aranges.to_gimli(),
1265            debug_info: sections.debug_info.to_gimli(),
1266            debug_line: sections.debug_line.to_gimli(),
1267            debug_line_str: sections.debug_line_str.to_gimli(),
1268            debug_names: sections.debug_names.to_gimli(),
1269            debug_str: sections.debug_str.to_gimli(),
1270            debug_str_offsets: sections.debug_str_offsets.to_gimli(),
1271            debug_types: Default::default(),
1272            debug_macinfo: sections.debug_macinfo.to_gimli(),
1273            debug_macro: sections.debug_macro.to_gimli(),
1274            locations: Default::default(),
1275            ranges: RangeLists::new(
1276                sections.debug_ranges.to_gimli(),
1277                sections.debug_rnglists.to_gimli(),
1278            ),
1279            file_type: DwarfFileType::Main,
1280            sup: Default::default(),
1281        };
1282        inner.populate_abbreviations_cache(AbbreviationsCacheStrategy::Duplicates);
1283
1284        // Prepare random access to unit headers.
1285        let headers = FallibleIterator::collect::<Vec<_>>(inner.units())?;
1286        let units = headers.iter().map(|_| OnceCell::new()).collect();
1287
1288        Ok(DwarfInfo {
1289            inner,
1290            headers,
1291            units,
1292            symbol_map,
1293            address_offset,
1294            kind,
1295        })
1296    }
1297
1298    /// Loads a compilation unit.
1299    fn get_unit(&self, index: usize) -> Result<Option<&Unit<'d>>, DwarfError> {
1300        // Silently ignore unit references out-of-bound
1301        let cell = match self.units.get(index) {
1302            Some(cell) => cell,
1303            None => return Ok(None),
1304        };
1305
1306        let unit_opt = cell.get_or_try_init(|| {
1307            // Parse the compilation unit from the header. This requires a top-level DIE that
1308            // describes the unit itself. For some older DWARF files, this DIE might be missing
1309            // which causes gimli to error out. We prefer to skip them silently as this simply marks
1310            // an empty unit for us.
1311            let header = self.headers[index];
1312            match self.inner.unit(header) {
1313                Ok(unit) => Ok(Some(unit)),
1314                Err(gimli::read::Error::MissingUnitDie) => Ok(None),
1315                Err(error) => Err(DwarfError::from(error)),
1316            }
1317        })?;
1318
1319        Ok(unit_opt.as_ref())
1320    }
1321
1322    /// Resolves an offset into a different compilation unit.
1323    fn find_unit_offset(
1324        &self,
1325        offset: DebugInfoOffset,
1326    ) -> Result<(UnitRef<'d, '_>, UnitOffset), DwarfError> {
1327        let section_offset = UnitSectionOffset(offset.0);
1328        let search_result = self
1329            .headers
1330            .binary_search_by_key(&section_offset, UnitHeader::offset);
1331
1332        let index = match search_result {
1333            Ok(index) => index,
1334            Err(0) => return Err(DwarfErrorKind::InvalidUnitRef(offset.0).into()),
1335            Err(next_index) => next_index - 1,
1336        };
1337
1338        if let Some(unit) = self.get_unit(index)? {
1339            if let Some(unit_offset) = section_offset.to_unit_offset(unit) {
1340                return Ok((UnitRef { unit, info: self }, unit_offset));
1341            }
1342        }
1343
1344        Err(DwarfErrorKind::InvalidUnitRef(offset.0).into())
1345    }
1346
1347    /// Returns an iterator over all compilation units.
1348    fn units(&'d self, bcsymbolmap: Option<&'d BcSymbolMap<'d>>) -> DwarfUnitIterator<'d> {
1349        DwarfUnitIterator {
1350            info: self,
1351            bcsymbolmap,
1352            index: 0,
1353        }
1354    }
1355}
1356
1357impl<'slf, 'd: 'slf> AsSelf<'slf> for DwarfInfo<'d> {
1358    type Ref = DwarfInfo<'slf>;
1359
1360    fn as_self(&'slf self) -> &'slf Self::Ref {
1361        unsafe { std::mem::transmute(self) }
1362    }
1363}
1364
1365impl fmt::Debug for DwarfInfo<'_> {
1366    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1367        f.debug_struct("DwarfInfo")
1368            .field("headers", &self.headers)
1369            .field("symbol_map", &self.symbol_map)
1370            .field("address_offset", &self.address_offset)
1371            .finish()
1372    }
1373}
1374
1375/// An iterator over compilation units in a DWARF object.
1376struct DwarfUnitIterator<'s> {
1377    info: &'s DwarfInfo<'s>,
1378    bcsymbolmap: Option<&'s BcSymbolMap<'s>>,
1379    index: usize,
1380}
1381
1382impl<'s> Iterator for DwarfUnitIterator<'s> {
1383    type Item = Result<DwarfUnit<'s, 's>, DwarfError>;
1384
1385    fn next(&mut self) -> Option<Self::Item> {
1386        while self.index < self.info.headers.len() {
1387            let result = self.info.get_unit(self.index);
1388            self.index += 1;
1389
1390            let unit = match result {
1391                Ok(Some(unit)) => unit,
1392                Ok(None) => continue,
1393                Err(error) => return Some(Err(error)),
1394            };
1395
1396            match DwarfUnit::from_unit(unit, self.info, self.bcsymbolmap) {
1397                Ok(Some(unit)) => return Some(Ok(unit)),
1398                Ok(None) => continue,
1399                Err(error) => return Some(Err(error)),
1400            }
1401        }
1402
1403        None
1404    }
1405}
1406
1407impl std::iter::FusedIterator for DwarfUnitIterator<'_> {}
1408
1409/// A debugging session for DWARF debugging information.
1410pub struct DwarfDebugSession<'data> {
1411    cell: SelfCell<Box<DwarfSections<'data>>, DwarfInfo<'data>>,
1412    bcsymbolmap: Option<Arc<BcSymbolMap<'data>>>,
1413}
1414
1415impl<'data> DwarfDebugSession<'data> {
1416    /// Parses a dwarf debugging information from the given DWARF file.
1417    pub fn parse<D>(
1418        dwarf: &D,
1419        symbol_map: SymbolMap<'data>,
1420        address_offset: i64,
1421        kind: ObjectKind,
1422    ) -> Result<Self, DwarfError>
1423    where
1424        D: Dwarf<'data>,
1425    {
1426        let sections = DwarfSections::from_dwarf(dwarf);
1427        let cell = SelfCell::try_new(Box::new(sections), |sections| {
1428            DwarfInfo::parse(unsafe { &*sections }, symbol_map, address_offset, kind)
1429        })?;
1430
1431        Ok(DwarfDebugSession {
1432            cell,
1433            bcsymbolmap: None,
1434        })
1435    }
1436
1437    /// Loads the [`BcSymbolMap`] into this debug session.
1438    ///
1439    /// All the file and function names yielded by this debug session will be resolved using
1440    /// the provided symbol map.
1441    #[cfg(feature = "macho")]
1442    pub(crate) fn load_symbolmap(&mut self, symbolmap: Option<Arc<BcSymbolMap<'data>>>) {
1443        self.bcsymbolmap = symbolmap;
1444    }
1445
1446    /// Returns an iterator over all source files in this debug file.
1447    pub fn files(&self) -> DwarfFileIterator<'_> {
1448        DwarfFileIterator {
1449            units: self.cell.get().units(self.bcsymbolmap.as_deref()),
1450            files: DwarfUnitFileIterator::default(),
1451            finished: false,
1452        }
1453    }
1454
1455    /// Returns an iterator over all functions in this debug file.
1456    pub fn functions(&self) -> DwarfFunctionIterator<'_> {
1457        DwarfFunctionIterator {
1458            units: self.cell.get().units(self.bcsymbolmap.as_deref()),
1459            functions: Vec::new().into_iter(),
1460            seen_ranges: BTreeSet::new(),
1461            finished: false,
1462        }
1463    }
1464
1465    /// See [DebugSession::source_by_path] for more information.
1466    pub fn source_by_path(
1467        &self,
1468        _path: &str,
1469    ) -> Result<Option<SourceFileDescriptor<'_>>, DwarfError> {
1470        Ok(None)
1471    }
1472}
1473
1474impl<'session> DebugSession<'session> for DwarfDebugSession<'_> {
1475    type Error = DwarfError;
1476    type FunctionIterator = DwarfFunctionIterator<'session>;
1477    type FileIterator = DwarfFileIterator<'session>;
1478
1479    fn functions(&'session self) -> Self::FunctionIterator {
1480        self.functions()
1481    }
1482
1483    fn files(&'session self) -> Self::FileIterator {
1484        self.files()
1485    }
1486
1487    fn source_by_path(&self, path: &str) -> Result<Option<SourceFileDescriptor<'_>>, Self::Error> {
1488        self.source_by_path(path)
1489    }
1490}
1491
1492#[derive(Debug, Default)]
1493struct DwarfUnitFileIterator<'s> {
1494    unit: Option<DwarfUnit<'s, 's>>,
1495    index: usize,
1496}
1497
1498impl<'s> Iterator for DwarfUnitFileIterator<'s> {
1499    type Item = FileEntry<'s>;
1500
1501    fn next(&mut self) -> Option<Self::Item> {
1502        let unit = self.unit.as_ref()?;
1503        let line_program = unit.line_program.as_ref().map(|p| &p.header)?;
1504        let file = line_program.file_names().get(self.index)?;
1505
1506        self.index += 1;
1507
1508        Some(FileEntry::new(
1509            Cow::Borrowed(unit.compilation_dir()),
1510            unit.file_info(line_program, file),
1511        ))
1512    }
1513}
1514
1515fn resolve_byte_name<'s>(bcsymbolmap: Option<&'s BcSymbolMap<'s>>, s: &'s [u8]) -> &'s [u8] {
1516    bcsymbolmap
1517        .and_then(|b| b.resolve_opt(s))
1518        .map(AsRef::as_ref)
1519        .unwrap_or(s)
1520}
1521
1522fn resolve_cow_name<'s>(bcsymbolmap: Option<&'s BcSymbolMap<'s>>, s: Cow<'s, str>) -> Cow<'s, str> {
1523    bcsymbolmap
1524        .and_then(|b| b.resolve_opt(s.as_bytes()))
1525        .map(Cow::Borrowed)
1526        .unwrap_or(s)
1527}
1528
1529/// An iterator over source files in a DWARF file.
1530pub struct DwarfFileIterator<'s> {
1531    units: DwarfUnitIterator<'s>,
1532    files: DwarfUnitFileIterator<'s>,
1533    finished: bool,
1534}
1535
1536impl<'s> Iterator for DwarfFileIterator<'s> {
1537    type Item = Result<FileEntry<'s>, DwarfError>;
1538
1539    fn next(&mut self) -> Option<Self::Item> {
1540        if self.finished {
1541            return None;
1542        }
1543
1544        loop {
1545            if let Some(file_entry) = self.files.next() {
1546                return Some(Ok(file_entry));
1547            }
1548
1549            let unit = match self.units.next() {
1550                Some(Ok(unit)) => unit,
1551                Some(Err(error)) => return Some(Err(error)),
1552                None => break,
1553            };
1554
1555            self.files = DwarfUnitFileIterator {
1556                unit: Some(unit),
1557                index: 0,
1558            };
1559        }
1560
1561        self.finished = true;
1562        None
1563    }
1564}
1565
1566/// An iterator over functions in a DWARF file.
1567pub struct DwarfFunctionIterator<'s> {
1568    units: DwarfUnitIterator<'s>,
1569    functions: std::vec::IntoIter<Function<'s>>,
1570    seen_ranges: BTreeSet<(u64, u64)>,
1571    finished: bool,
1572}
1573
1574impl<'s> Iterator for DwarfFunctionIterator<'s> {
1575    type Item = Result<Function<'s>, DwarfError>;
1576
1577    fn next(&mut self) -> Option<Self::Item> {
1578        if self.finished {
1579            return None;
1580        }
1581
1582        loop {
1583            if let Some(func) = self.functions.next() {
1584                return Some(Ok(func));
1585            }
1586
1587            let unit = match self.units.next() {
1588                Some(Ok(unit)) => unit,
1589                Some(Err(error)) => return Some(Err(error)),
1590                None => break,
1591            };
1592
1593            self.functions = match unit.functions(&mut self.seen_ranges) {
1594                Ok(functions) => functions.into_iter(),
1595                Err(error) => return Some(Err(error)),
1596            };
1597        }
1598
1599        self.finished = true;
1600        None
1601    }
1602}
1603
1604impl std::iter::FusedIterator for DwarfFunctionIterator<'_> {}
1605
1606#[cfg(test)]
1607mod tests {
1608    use super::*;
1609
1610    use crate::macho::MachObject;
1611
1612    #[cfg(feature = "macho")]
1613    #[test]
1614    fn test_loads_debug_str_offsets() {
1615        // File generated using dsymutil
1616
1617        let data = std::fs::read("tests/fixtures/helloworld").unwrap();
1618
1619        let obj = MachObject::parse(&data).unwrap();
1620
1621        let sections = DwarfSections::from_dwarf(&obj);
1622        assert_eq!(sections.debug_str_offsets.data.len(), 48);
1623    }
1624}