addr2line/
unit.rs

1use alloc::boxed::Box;
2use alloc::sync::Arc;
3use alloc::vec::Vec;
4use core::cmp;
5
6use gimli::ReaderOffset;
7
8use crate::{
9    Context, DebugFile, Error, Function, Functions, LazyFunctions, LazyLines, LazyResult,
10    LineLocationRangeIter, Lines, Location, LookupContinuation, LookupResult, RangeAttributes,
11    SimpleLookup, SplitDwarfLoad,
12};
13
14pub(crate) struct UnitRange {
15    unit_id: usize,
16    min_begin: u64,
17    range: gimli::Range,
18}
19
20pub(crate) struct ResUnit<R: gimli::Reader> {
21    offset: gimli::DebugInfoOffset<R::Offset>,
22    dw_unit: gimli::Unit<R>,
23    pub(crate) lang: Option<gimli::DwLang>,
24    lines: LazyLines,
25    functions: LazyFunctions<R>,
26    dwo: LazyResult<Option<Box<DwoUnit<R>>>>,
27}
28
29type UnitRef<'unit, R> = (DebugFile, gimli::UnitRef<'unit, R>);
30
31impl<R: gimli::Reader> ResUnit<R> {
32    pub(crate) fn unit_ref<'a>(&'a self, sections: &'a gimli::Dwarf<R>) -> gimli::UnitRef<'a, R> {
33        gimli::UnitRef::new(sections, &self.dw_unit)
34    }
35
36    /// Returns the DWARF sections and the unit.
37    ///
38    /// Loads the DWO unit if necessary.
39    #[allow(clippy::type_complexity)]
40    pub(crate) fn dwarf_and_unit<'unit, 'ctx: 'unit>(
41        &'unit self,
42        ctx: &'ctx Context<R>,
43    ) -> LookupResult<
44        SimpleLookup<
45            Result<UnitRef<'unit, R>, Error>,
46            R,
47            impl FnOnce(Option<Arc<gimli::Dwarf<R>>>) -> Result<UnitRef<'unit, R>, Error>,
48        >,
49    > {
50        let map_dwo = move |dwo: &'unit Result<Option<Box<DwoUnit<R>>>, Error>| match dwo {
51            Ok(Some(dwo)) => Ok((DebugFile::Dwo, dwo.unit_ref())),
52            Ok(None) => Ok((DebugFile::Primary, self.unit_ref(&*ctx.sections))),
53            Err(e) => Err(*e),
54        };
55        let complete = |dwo| SimpleLookup::new_complete(map_dwo(dwo));
56
57        if let Some(dwo) = self.dwo.get() {
58            return complete(dwo);
59        }
60
61        let dwo_id = match self.dw_unit.dwo_id {
62            None => {
63                return complete(self.dwo.get_or_init(|| Ok(None)));
64            }
65            Some(dwo_id) => dwo_id,
66        };
67
68        let comp_dir = self.dw_unit.comp_dir.clone();
69
70        let dwo_name = self.dw_unit.dwo_name().and_then(|s| {
71            if let Some(s) = s {
72                Ok(Some(ctx.sections.attr_string(&self.dw_unit, s)?))
73            } else {
74                Ok(None)
75            }
76        });
77
78        let path = match dwo_name {
79            Ok(v) => v,
80            Err(e) => {
81                return complete(self.dwo.get_or_init(|| Err(e)));
82            }
83        };
84
85        let process_dwo = move |dwo_dwarf: Option<Arc<gimli::Dwarf<R>>>| {
86            let dwo_dwarf = match dwo_dwarf {
87                None => return Ok(None),
88                Some(dwo_dwarf) => dwo_dwarf,
89            };
90            let mut dwo_units = dwo_dwarf.units();
91            let dwo_header = match dwo_units.next()? {
92                Some(dwo_header) => dwo_header,
93                None => return Ok(None),
94            };
95
96            let mut dwo_unit = dwo_dwarf.unit(dwo_header)?;
97            dwo_unit.copy_relocated_attributes(&self.dw_unit);
98            Ok(Some(Box::new(DwoUnit {
99                sections: dwo_dwarf,
100                dw_unit: dwo_unit,
101            })))
102        };
103
104        SimpleLookup::new_needs_load(
105            SplitDwarfLoad {
106                dwo_id,
107                comp_dir,
108                path,
109                parent: ctx.sections.clone(),
110            },
111            move |dwo_dwarf| map_dwo(self.dwo.get_or_init(|| process_dwo(dwo_dwarf))),
112        )
113    }
114
115    pub(crate) fn parse_lines(&self, sections: &gimli::Dwarf<R>) -> Result<Option<&Lines>, Error> {
116        // NB: line information is always stored in the main debug file so this does not need
117        // to handle DWOs.
118        let ilnp = match self.dw_unit.line_program {
119            Some(ref ilnp) => ilnp,
120            None => return Ok(None),
121        };
122        self.lines.borrow(self.unit_ref(sections), ilnp).map(Some)
123    }
124
125    pub(crate) fn parse_functions<'unit, 'ctx: 'unit>(
126        &'unit self,
127        ctx: &'ctx Context<R>,
128    ) -> LookupResult<impl LookupContinuation<Output = Result<&'unit Functions<R>, Error>, Buf = R>>
129    {
130        self.dwarf_and_unit(ctx).map(move |r| {
131            let (_file, unit) = r?;
132            self.functions.borrow(unit)
133        })
134    }
135
136    pub(crate) fn parse_inlined_functions<'unit, 'ctx: 'unit>(
137        &'unit self,
138        ctx: &'ctx Context<R>,
139    ) -> LookupResult<impl LookupContinuation<Output = Result<(), Error>, Buf = R> + 'unit> {
140        self.dwarf_and_unit(ctx).map(move |r| {
141            let (file, unit) = r?;
142            self.functions
143                .borrow(unit)?
144                .parse_inlined_functions(file, unit, ctx)
145        })
146    }
147
148    pub(crate) fn find_location(
149        &self,
150        probe: u64,
151        sections: &gimli::Dwarf<R>,
152    ) -> Result<Option<Location<'_>>, Error> {
153        let Some(lines) = self.parse_lines(sections)? else {
154            return Ok(None);
155        };
156        lines.find_location(probe)
157    }
158
159    #[inline]
160    pub(crate) fn find_location_range(
161        &self,
162        probe_low: u64,
163        probe_high: u64,
164        sections: &gimli::Dwarf<R>,
165    ) -> Result<Option<LineLocationRangeIter<'_>>, Error> {
166        let Some(lines) = self.parse_lines(sections)? else {
167            return Ok(None);
168        };
169        lines.find_location_range(probe_low, probe_high).map(Some)
170    }
171
172    #[allow(clippy::type_complexity)]
173    pub(crate) fn find_function_or_location<'unit, 'ctx: 'unit>(
174        &'unit self,
175        probe: u64,
176        ctx: &'ctx Context<R>,
177    ) -> LookupResult<
178        impl LookupContinuation<
179            Output = Result<(Option<&'unit Function<R>>, Option<Location<'unit>>), Error>,
180            Buf = R,
181        >,
182    > {
183        self.dwarf_and_unit(ctx).map(move |r| {
184            let (file, unit) = r?;
185            let functions = self.functions.borrow(unit)?;
186            let function = match functions.find_address(probe) {
187                Some(address) => {
188                    let function_index = functions.addresses[address].function;
189                    let function = &functions.functions[function_index];
190                    Some(function.borrow(file, unit, ctx)?)
191                }
192                None => None,
193            };
194            let location = self.find_location(probe, &ctx.sections)?;
195            Ok((function, location))
196        })
197    }
198}
199
200pub(crate) struct ResUnits<R: gimli::Reader> {
201    ranges: Box<[UnitRange]>,
202    units: Box<[ResUnit<R>]>,
203}
204
205impl<R: gimli::Reader> ResUnits<R> {
206    pub(crate) fn parse(sections: &gimli::Dwarf<R>) -> Result<Self, Error> {
207        // Find all the references to compilation units in .debug_aranges.
208        // Note that we always also iterate through all of .debug_info to
209        // find compilation units, because .debug_aranges may be missing some.
210        let mut aranges = Vec::new();
211        let mut headers = sections.debug_aranges.headers();
212        while let Some(header) = headers.next()? {
213            aranges.push((header.debug_info_offset(), header.offset()));
214        }
215        aranges.sort_unstable_by_key(|i| i.0);
216
217        let mut unit_ranges = Vec::new();
218        let mut res_units = Vec::new();
219        let mut units = sections.units();
220        while let Some(header) = units.next()? {
221            let unit_id = res_units.len();
222            let offset = match header.debug_info_offset() {
223                Some(offset) => offset,
224                None => continue,
225            };
226            // We mainly want compile units, but we may need to follow references to entries
227            // within other units for function names.  We don't need anything from type units.
228            let mut need_unit_range = match header.type_() {
229                gimli::UnitType::Type { .. } | gimli::UnitType::SplitType { .. } => continue,
230                gimli::UnitType::Partial => {
231                    // Partial units are only needed for references from other units.
232                    // They shouldn't have any address ranges.
233                    false
234                }
235                _ => true,
236            };
237            let dw_unit = match sections.unit(header) {
238                Ok(dw_unit) => dw_unit,
239                Err(_) => continue,
240            };
241            let dw_unit_ref = gimli::UnitRef::new(sections, &dw_unit);
242
243            let mut lang = None;
244            if need_unit_range {
245                let mut entries = dw_unit_ref.entries_raw(None)?;
246
247                let abbrev = match entries.read_abbreviation()? {
248                    Some(abbrev) => abbrev,
249                    None => continue,
250                };
251
252                let mut ranges = RangeAttributes::default();
253                for spec in abbrev.attributes() {
254                    let attr = entries.read_attribute(*spec)?;
255                    match attr.name() {
256                        gimli::DW_AT_low_pc => match attr.value() {
257                            gimli::AttributeValue::Addr(val) => ranges.low_pc = Some(val),
258                            gimli::AttributeValue::DebugAddrIndex(index) => {
259                                ranges.low_pc = Some(dw_unit_ref.address(index)?);
260                            }
261                            _ => {}
262                        },
263                        gimli::DW_AT_high_pc => match attr.value() {
264                            gimli::AttributeValue::Addr(val) => ranges.high_pc = Some(val),
265                            gimli::AttributeValue::DebugAddrIndex(index) => {
266                                ranges.high_pc = Some(dw_unit_ref.address(index)?);
267                            }
268                            gimli::AttributeValue::Udata(val) => ranges.size = Some(val),
269                            _ => {}
270                        },
271                        gimli::DW_AT_ranges => {
272                            ranges.ranges_offset = dw_unit_ref.attr_ranges_offset(attr.value())?;
273                        }
274                        gimli::DW_AT_language => {
275                            if let gimli::AttributeValue::Language(val) = attr.value() {
276                                lang = Some(val);
277                            }
278                        }
279                        _ => {}
280                    }
281                }
282
283                // Find the address ranges for the CU, using in order of preference:
284                // - DW_AT_ranges
285                // - .debug_aranges
286                // - DW_AT_low_pc/DW_AT_high_pc
287                //
288                // Using DW_AT_ranges before .debug_aranges is possibly an arbitrary choice,
289                // but the feeling is that DW_AT_ranges is more likely to be reliable or complete
290                // if it is present.
291                //
292                // .debug_aranges must be used before DW_AT_low_pc/DW_AT_high_pc because
293                // it has been observed on macOS that DW_AT_ranges was not emitted even for
294                // discontiguous CUs.
295                let i = match ranges.ranges_offset {
296                    Some(_) => None,
297                    None => aranges.binary_search_by_key(&offset, |x| x.0).ok(),
298                };
299                if let Some(mut i) = i {
300                    // There should be only one set per CU, but in practice multiple
301                    // sets have been observed. This is probably a compiler bug, but
302                    // either way we need to handle it.
303                    while i > 0 && aranges[i - 1].0 == offset {
304                        i -= 1;
305                    }
306                    for (_, aranges_offset) in aranges[i..].iter().take_while(|x| x.0 == offset) {
307                        let aranges_header = sections.debug_aranges.header(*aranges_offset)?;
308                        let mut aranges = aranges_header.entries();
309                        while let Some(arange) = aranges.next().transpose() {
310                            let Ok(arange) = arange else {
311                                // Ignore errors. In particular, this will ignore address overflow.
312                                // This has been seen for a unit that had a single variable
313                                // with rustc 1.89.0.
314                                //
315                                // This relies on `ArangeEntryIter::next` fusing for errors that
316                                // can't be ignored.
317                                continue;
318                            };
319                            if arange.length() != 0 {
320                                unit_ranges.push(UnitRange {
321                                    range: arange.range(),
322                                    unit_id,
323                                    min_begin: 0,
324                                });
325                                need_unit_range = false;
326                            }
327                        }
328                    }
329                }
330                if need_unit_range {
331                    need_unit_range = !ranges.for_each_range(dw_unit_ref, |range| {
332                        unit_ranges.push(UnitRange {
333                            range,
334                            unit_id,
335                            min_begin: 0,
336                        });
337                    })?;
338                }
339            }
340
341            let lines = LazyLines::new();
342            if need_unit_range {
343                // The unit did not declare any ranges.
344                // Try to get some ranges from the line program sequences.
345                if let Some(ref ilnp) = dw_unit_ref.line_program {
346                    if let Ok(lines) = lines.borrow(dw_unit_ref, ilnp) {
347                        for range in lines.ranges() {
348                            unit_ranges.push(UnitRange {
349                                range,
350                                unit_id,
351                                min_begin: 0,
352                            })
353                        }
354                    }
355                }
356            }
357
358            res_units.push(ResUnit {
359                offset,
360                dw_unit,
361                lang,
362                lines,
363                functions: LazyFunctions::new(),
364                dwo: LazyResult::new(),
365            });
366        }
367
368        // Sort this for faster lookup in `Self::find_range`.
369        unit_ranges.sort_unstable_by_key(|i| (i.range.end, i.unit_id));
370
371        // Calculate the `min_begin` field now that we've determined the order of
372        // CUs.
373        let mut min = !0;
374        for i in unit_ranges.iter_mut().rev() {
375            min = min.min(i.range.begin);
376            i.min_begin = min;
377        }
378
379        Ok(ResUnits {
380            ranges: unit_ranges.into_boxed_slice(),
381            units: res_units.into_boxed_slice(),
382        })
383    }
384
385    pub(crate) fn iter(&self) -> impl Iterator<Item = &ResUnit<R>> {
386        self.units.iter()
387    }
388
389    pub(crate) fn find_offset(
390        &self,
391        offset: gimli::DebugInfoOffset<R::Offset>,
392    ) -> Result<&gimli::Unit<R>, Error> {
393        match self
394            .units
395            .binary_search_by_key(&offset.0, |unit| unit.offset.0)
396        {
397            // There is never a DIE at the unit offset or before the first unit.
398            Ok(_) | Err(0) => Err(gimli::Error::NoEntryAtGivenOffset(offset.0.into_u64())),
399            Err(i) => Ok(&self.units[i - 1].dw_unit),
400        }
401    }
402
403    /// Finds the CUs for the function address given.
404    ///
405    /// There might be multiple CUs whose range contains this address.
406    /// Weak symbols have shown up in the wild which cause this to happen
407    /// but otherwise this can happen if the CU has non-contiguous functions
408    /// but only reports a single range.
409    ///
410    /// Consequently we return an iterator for all CUs which may contain the
411    /// address, and the caller must check if there is actually a function or
412    /// location in the CU for that address.
413    pub(crate) fn find(&self, probe: u64) -> impl Iterator<Item = &ResUnit<R>> {
414        self.find_range(probe, probe + 1).map(|(unit, _range)| unit)
415    }
416
417    /// Finds the CUs covering the range of addresses given.
418    ///
419    /// The range is [low, high) (ie, the upper bound is exclusive). This can return multiple
420    /// ranges for the same unit.
421    #[inline]
422    pub(crate) fn find_range(
423        &self,
424        probe_low: u64,
425        probe_high: u64,
426    ) -> impl Iterator<Item = (&ResUnit<R>, &gimli::Range)> {
427        // Find the position of the next range after a range which
428        // ends at `probe_low` or lower.
429        let pos = match self
430            .ranges
431            .binary_search_by_key(&probe_low, |i| i.range.end)
432        {
433            Ok(i) => i + 1, // Range `i` ends at exactly `probe_low`.
434            Err(i) => i,    // Range `i - 1` ends at a lower address.
435        };
436
437        // Iterate from that position to find matching CUs.
438        self.ranges[pos..]
439            .iter()
440            .take_while(move |i| {
441                // We know that this CU's end is at least `probe_low` because
442                // of our sorted array.
443                debug_assert!(i.range.end >= probe_low);
444
445                // Each entry keeps track of the minimum begin address for the
446                // remainder of the array of unit ranges. If our probe is before
447                // the minimum range begin of this entry, then it's guaranteed
448                // to not fit in any subsequent entries, so we break out.
449                probe_high > i.min_begin
450            })
451            .filter_map(move |i| {
452                // If this CU doesn't actually contain this address, move to the
453                // next CU.
454                if probe_low >= i.range.end || probe_high <= i.range.begin {
455                    return None;
456                }
457                Some((&self.units[i.unit_id], &i.range))
458            })
459    }
460
461    pub(crate) fn find_location_range<'a>(
462        &'a self,
463        probe_low: u64,
464        probe_high: u64,
465        sections: &'a gimli::Dwarf<R>,
466    ) -> Result<LocationRangeIter<'a, R>, Error> {
467        let unit_iter = Box::new(self.find_range(probe_low, probe_high));
468        Ok(LocationRangeIter {
469            unit_iter,
470            iter: None,
471            probe_low,
472            probe_high,
473            sections,
474        })
475    }
476}
477
478/// A DWO unit has its own DWARF sections.
479struct DwoUnit<R: gimli::Reader> {
480    sections: Arc<gimli::Dwarf<R>>,
481    dw_unit: gimli::Unit<R>,
482}
483
484impl<R: gimli::Reader> DwoUnit<R> {
485    fn unit_ref(&self) -> gimli::UnitRef<'_, R> {
486        gimli::UnitRef::new(&self.sections, &self.dw_unit)
487    }
488}
489
490pub(crate) struct SupUnit<R: gimli::Reader> {
491    offset: gimli::DebugInfoOffset<R::Offset>,
492    dw_unit: gimli::Unit<R>,
493}
494
495pub(crate) struct SupUnits<R: gimli::Reader> {
496    units: Box<[SupUnit<R>]>,
497}
498
499impl<R: gimli::Reader> Default for SupUnits<R> {
500    fn default() -> Self {
501        SupUnits {
502            units: Box::default(),
503        }
504    }
505}
506
507impl<R: gimli::Reader> SupUnits<R> {
508    pub(crate) fn parse(sections: &gimli::Dwarf<R>) -> Result<Self, Error> {
509        let mut sup_units = Vec::new();
510        let mut units = sections.units();
511        while let Some(header) = units.next()? {
512            let offset = match header.debug_info_offset() {
513                Some(offset) => offset,
514                None => continue,
515            };
516            let dw_unit = match sections.unit(header) {
517                Ok(dw_unit) => dw_unit,
518                Err(_) => continue,
519            };
520            sup_units.push(SupUnit { dw_unit, offset });
521        }
522        Ok(SupUnits {
523            units: sup_units.into_boxed_slice(),
524        })
525    }
526
527    pub(crate) fn find_offset(
528        &self,
529        offset: gimli::DebugInfoOffset<R::Offset>,
530    ) -> Result<&gimli::Unit<R>, Error> {
531        match self
532            .units
533            .binary_search_by_key(&offset.0, |unit| unit.offset.0)
534        {
535            // There is never a DIE at the unit offset or before the first unit.
536            Ok(_) | Err(0) => Err(gimli::Error::NoEntryAtGivenOffset(offset.0.into_u64())),
537            Err(i) => Ok(&self.units[i - 1].dw_unit),
538        }
539    }
540}
541
542/// Iterator over `Location`s in a range of addresses, returned by `Context::find_location_range`.
543pub struct LocationRangeIter<'ctx, R: gimli::Reader> {
544    unit_iter: Box<dyn Iterator<Item = (&'ctx ResUnit<R>, &'ctx gimli::Range)> + 'ctx>,
545    iter: Option<LineLocationRangeIter<'ctx>>,
546
547    probe_low: u64,
548    probe_high: u64,
549    sections: &'ctx gimli::Dwarf<R>,
550}
551
552impl<'ctx, R: gimli::Reader> LocationRangeIter<'ctx, R> {
553    fn next_loc(&mut self) -> Result<Option<(u64, u64, Location<'ctx>)>, Error> {
554        loop {
555            let iter = self.iter.take();
556            match iter {
557                None => match self.unit_iter.next() {
558                    Some((unit, range)) => {
559                        self.iter = unit.find_location_range(
560                            cmp::max(self.probe_low, range.begin),
561                            cmp::min(self.probe_high, range.end),
562                            self.sections,
563                        )?;
564                    }
565                    None => return Ok(None),
566                },
567                Some(mut iter) => {
568                    if let item @ Some(_) = iter.next() {
569                        self.iter = Some(iter);
570                        return Ok(item);
571                    }
572                }
573            }
574        }
575    }
576}
577
578impl<'ctx, R> Iterator for LocationRangeIter<'ctx, R>
579where
580    R: gimli::Reader + 'ctx,
581{
582    type Item = (u64, u64, Location<'ctx>);
583
584    #[inline]
585    fn next(&mut self) -> Option<Self::Item> {
586        self.next_loc().unwrap_or_default()
587    }
588}
589
590#[cfg(feature = "fallible-iterator")]
591impl<'ctx, R> fallible_iterator::FallibleIterator for LocationRangeIter<'ctx, R>
592where
593    R: gimli::Reader + 'ctx,
594{
595    type Item = (u64, u64, Location<'ctx>);
596    type Error = Error;
597
598    #[inline]
599    fn next(&mut self) -> Result<Option<Self::Item>, Self::Error> {
600        self.next_loc()
601    }
602}