blazesym/dwarf/
resolver.rs

1use std::borrow::Cow;
2#[cfg(test)]
3use std::env;
4use std::ffi::OsStr;
5use std::fmt::Debug;
6use std::fmt::Formatter;
7use std::fmt::Result as FmtResult;
8use std::mem;
9use std::mem::swap;
10use std::ops::ControlFlow;
11use std::ops::Deref as _;
12use std::path::Path;
13use std::path::PathBuf;
14use std::rc::Rc;
15
16use gimli::AbbreviationsCacheStrategy;
17use gimli::Dwarf;
18
19use crate::dwarf::reader::Endianess;
20use crate::dwarf::reader::R;
21use crate::elf::ElfParser;
22#[cfg(test)]
23use crate::elf::DEFAULT_DEBUG_DIRS;
24use crate::error::IntoCowStr;
25use crate::inspect::FindAddrOpts;
26use crate::inspect::ForEachFn;
27use crate::inspect::Inspect;
28use crate::inspect::SymInfo;
29use crate::log;
30use crate::log::debug;
31use crate::log::warn;
32use crate::symbolize::CodeInfo;
33use crate::symbolize::FindSymOpts;
34use crate::symbolize::InlinedFn;
35use crate::symbolize::Reason;
36use crate::symbolize::ResolvedSym;
37use crate::symbolize::SrcLang;
38use crate::symbolize::Symbolize;
39use crate::Addr;
40use crate::Error;
41use crate::ErrorExt;
42use crate::ErrorKind;
43use crate::Mmap;
44use crate::Result;
45use crate::SymType;
46
47use super::debug_link::debug_link_crc32;
48use super::debug_link::read_debug_link;
49use super::debug_link::DebugFileIter;
50use super::function::Function;
51use super::location::Location;
52use super::reader;
53use super::unit::Unit;
54use super::units::Units;
55
56
57impl ErrorExt for gimli::Error {
58    type Output = Error;
59
60    fn context<C>(self, context: C) -> Self::Output
61    where
62        C: IntoCowStr,
63    {
64        Error::from(self).context(context)
65    }
66
67    fn with_context<C, F>(self, f: F) -> Self::Output
68    where
69        C: IntoCowStr,
70        F: FnOnce() -> C,
71    {
72        Error::from(self).with_context(f)
73    }
74}
75
76
77impl From<Option<gimli::DwLang>> for SrcLang {
78    fn from(other: Option<gimli::DwLang>) -> Self {
79        match other {
80            Some(gimli::DW_LANG_Rust) => SrcLang::Rust,
81            Some(
82                gimli::DW_LANG_C_plus_plus
83                | gimli::DW_LANG_C_plus_plus_03
84                | gimli::DW_LANG_C_plus_plus_11
85                | gimli::DW_LANG_C_plus_plus_14
86                | gimli::DW_LANG_C_plus_plus_17
87                | gimli::DW_LANG_C_plus_plus_20,
88            ) => SrcLang::Cpp,
89            _ => SrcLang::Unknown,
90        }
91    }
92}
93
94
95/// Find a debug file in a list of default directories.
96///
97/// `linker` is the path to the file containing the debug link. This function
98/// searches a couple of "well-known" locations and then others constructed
99/// based on the canonicalized path of `linker`.
100///
101/// # Notes
102/// This function ignores any errors encountered.
103fn find_debug_file(file: &OsStr, linker: Option<&Path>, debug_dirs: &[PathBuf]) -> Option<PathBuf> {
104    let canonical_linker = linker.and_then(|linker| linker.canonicalize().ok());
105    let it = DebugFileIter::new(debug_dirs, canonical_linker.as_deref(), file);
106
107    for path in it {
108        if path.exists() {
109            debug!("found debug info at `{}`", path.display());
110            return Some(path)
111        }
112    }
113    warn!(
114        "debug link references destination `{}` which was not found in any known location",
115        Path::new(file).display(),
116    );
117    None
118}
119
120
121// TODO: We really should have a cache of debug link targets as well, to
122//       avoid potentially re-parsing the same files over and over
123//       again.
124fn try_deref_debug_link(
125    parser: &ElfParser,
126    debug_dirs: &[PathBuf],
127) -> Result<Option<Rc<ElfParser>>> {
128    if let Some((file, checksum)) = read_debug_link(parser)? {
129        // TODO: Usage of the module here is fishy, as it may not
130        //       represent an actual path. However, even using the
131        //       actual path is not necessarily correct. Consider if the
132        //       `ElfParser` references a map_files file.
133        let linker = parser.module().map(OsStr::as_ref);
134        match find_debug_file(file, linker, debug_dirs) {
135            Some(path) => {
136                let mmap = Mmap::builder().open(&path).with_context(|| {
137                    format!("failed to open debug link destination `{}`", path.display())
138                })?;
139                let crc = debug_link_crc32(&mmap);
140                if crc != checksum {
141                    return Err(Error::with_invalid_data(format!(
142                        "debug link destination `{}` checksum does not match \
143                         expected one: {crc:x} (actual) != {checksum:x} (expected)",
144                        path.display()
145                    )))
146                }
147
148                let module = path.into_os_string();
149                let dst_parser = Rc::new(ElfParser::from_mmap(mmap, Some(module)));
150                Ok(Some(dst_parser))
151            }
152            None => Ok(None),
153        }
154    } else {
155        Ok(None)
156    }
157}
158
159
160/// Try to find a DWARF package (`.dwp`) "belonging" to the file
161/// referenced by the given [`ElfParser`].
162fn try_find_dwp(parser: &ElfParser) -> Result<Option<Rc<ElfParser>>> {
163    if let Some(path) = parser.module() {
164        let mut dwp_path = path.to_os_string();
165        let () = dwp_path.push(".dwp");
166        let dwp_path = PathBuf::from(dwp_path);
167
168        match ElfParser::open(&dwp_path) {
169            Ok(parser) => {
170                log::debug!("using DWARF package `{}`", dwp_path.display());
171                Ok(Some(Rc::new(parser)))
172            }
173            Err(err) if err.kind() == ErrorKind::NotFound => Ok(None),
174            Err(err) => Err(err),
175        }
176    } else {
177        Ok(None)
178    }
179}
180
181
182/// `DwarfResolver` provides abilities to query DWARF information of binaries.
183pub(crate) struct DwarfResolver {
184    /// The lazily parsed compilation units of the DWARF file.
185    // SAFETY: We must not hand out references with a 'static lifetime to
186    //         this member. Rather, they should never outlive `self`.
187    //         Furthermore, this member has to be listed before `parser`
188    //         and `linkee_parser` to make sure we never end up with a
189    //         dangling reference.
190    units: Units<'static>,
191    parser: Rc<ElfParser>,
192    /// If the source file contains a valid debug link, this parser
193    /// represents it.
194    linkee_parser: Option<Rc<ElfParser>>,
195    /// If there exist an associated DWARF Package (*.dwp), this parser
196    /// represents it.
197    _dwp_parser: Option<Rc<ElfParser>>,
198}
199
200impl DwarfResolver {
201    /// Retrieve the resolver's underlying `ElfParser`.
202    pub(crate) fn parser(&self) -> &Rc<ElfParser> {
203        &self.parser
204    }
205
206    pub(crate) fn from_parser(parser: Rc<ElfParser>, debug_dirs: &[PathBuf]) -> Result<Self> {
207        let linkee_parser = try_deref_debug_link(&parser, debug_dirs)?;
208        let dwp_parser = try_find_dwp(&parser)?;
209
210        // SAFETY: We own the `ElfParser` and make sure that it stays
211        //         around while the `Units` object uses it. As such, it
212        //         is fine to conjure a 'static lifetime here.
213        let static_linkee_parser = unsafe {
214            mem::transmute::<&ElfParser, &'static ElfParser>(
215                linkee_parser.as_ref().unwrap_or(&parser).deref(),
216            )
217        };
218        let mut load_section = |section| reader::load_section(static_linkee_parser, section);
219        let mut dwarf = Dwarf::load(&mut load_section)?;
220        // Cache abbreviations (which will cause them to be
221        // automatically reused across compilation units), which can
222        // speed up parsing of debug information potentially
223        // dramatically, depending on debug information layout and how
224        // much effort the linker spent on optimizing it.
225        let () = dwarf.populate_abbreviations_cache(AbbreviationsCacheStrategy::Duplicates);
226
227        let dwp = dwp_parser
228            .as_deref()
229            .map(|dwp_parser| {
230                let empty = R::new(&[], Endianess::default());
231                // SAFETY: We own the `ElfParser` and make sure that it
232                //         stays around while the `Units` object uses
233                //         it. As such, it is fine to conjure a 'static
234                //         lifetime here.
235                let static_dwp_parser =
236                    unsafe { mem::transmute::<&ElfParser, &'static ElfParser>(dwp_parser) };
237                let load_dwo_section =
238                    |section| reader::load_dwo_section(static_dwp_parser, section);
239                let dwp = gimli::DwarfPackage::load(load_dwo_section, empty)?;
240                Result::<_, Error>::Ok(dwp)
241            })
242            .transpose()?;
243
244        let units = Units::parse(dwarf, dwp)?;
245        let slf = Self {
246            units,
247            parser,
248            linkee_parser,
249            _dwp_parser: dwp_parser,
250        };
251        Ok(slf)
252    }
253
254    /// Open a file to load DWARF debug information.
255    #[cfg(test)]
256    fn open(path: &Path) -> Result<Self> {
257        let parser = ElfParser::open(path)?;
258        let debug_dirs = DEFAULT_DEBUG_DIRS
259            .iter()
260            .map(PathBuf::from)
261            .collect::<Vec<_>>();
262        Self::from_parser(Rc::new(parser), debug_dirs.as_slice())
263    }
264
265    /// Try converting a `Function` into a `SymInfo`.
266    ///
267    /// # Notes
268    /// This method only returns `None` if `function` does not have the `name`
269    /// attribute set.
270    fn function_to_sym_info<'slf>(
271        &'slf self,
272        function: &'slf Function,
273        offset_in_file: bool,
274    ) -> Result<Option<SymInfo<'slf>>> {
275        let name = if let Some(name) = function.name {
276            name.to_string().unwrap()
277        } else {
278            return Ok(None)
279        };
280        let addr = function
281            .range
282            .as_ref()
283            .map(|range| range.begin)
284            .unwrap_or(0);
285        let size = function
286            .range
287            .as_ref()
288            .and_then(|range| range.end.checked_sub(range.begin))
289            .map(|size| usize::try_from(size).unwrap_or(usize::MAX))
290            .unwrap_or(0);
291        let info = SymInfo {
292            name: Cow::Borrowed(name),
293            addr,
294            size: Some(size),
295            sym_type: SymType::Function,
296            file_offset: offset_in_file
297                .then(|| self.parser.find_file_offset(addr, size))
298                .transpose()?
299                .flatten(),
300            module: self.parser.module().map(Cow::Borrowed),
301            _non_exhaustive: (),
302        };
303        Ok(Some(info))
304    }
305}
306
307impl Symbolize for DwarfResolver {
308    fn find_sym(&self, addr: Addr, opts: &FindSymOpts) -> Result<Result<ResolvedSym<'_>, Reason>> {
309        let data = self.units.find_function(addr)?;
310        let mut sym = if let Some((function, unit)) = data {
311            let name = function
312                .name
313                .map(|name| name.to_string())
314                .transpose()?
315                .unwrap_or("");
316            let fn_addr = function.range.map(|range| range.begin).unwrap_or(0);
317            let size = function
318                .range
319                .map(|range| usize::try_from(range.end - range.begin).unwrap_or(usize::MAX));
320            ResolvedSym {
321                name,
322                module: self.parser.module(),
323                addr: fn_addr,
324                size,
325                lang: unit.language().into(),
326                code_info: None,
327                inlined: Box::new([]),
328                _non_exhaustive: (),
329            }
330        } else {
331            // Fall back to checking ELF for the symbol corresponding to
332            // the address. This is to mimic behavior of various tools
333            // (e.g., `addr2line`). Basically, what can happen is that a
334            // symbol is not present in DWARF, but source code
335            // information for the address actually is. By checking ELF
336            // as a fall back we support cases where ELF *does* contain
337            // symbol, and we amend its information with the source code
338            // information from DWARF.
339            let parser = self.linkee_parser.as_ref().unwrap_or(&self.parser).deref();
340            match parser.find_sym(addr, opts)? {
341                Ok(sym) => sym,
342                Err(reason) => return Ok(Err(reason)),
343            }
344        };
345
346        let () = self.units.fill_code_info(&mut sym, addr, opts, data)?;
347
348        Ok(Ok(sym))
349    }
350}
351
352impl Inspect for DwarfResolver {
353    /// Find information about a symbol given its name.
354    ///
355    /// # Notes
356    /// - lookup of variables is not currently supported
357    fn find_addr<'slf>(&'slf self, name: &str, opts: &FindAddrOpts) -> Result<Vec<SymInfo<'slf>>> {
358        if let SymType::Variable = opts.sym_type {
359            return Err(Error::with_unsupported("not implemented"))
360        }
361
362        let syms = self
363            .units
364            .find_name(name)
365            .map(|result| {
366                match result {
367                    Ok(function) => {
368                        // SANITY: We found the function by name, so it must have the
369                        //         name attribute set. `function_to_sym_info`
370                        //         only returns `None` if no name is present.
371                        let info = self
372                            .function_to_sym_info(function, opts.file_offset)?
373                            .unwrap();
374                        Ok(info)
375                    }
376                    Err(err) => Err(Error::from(err)),
377                }
378            })
379            .collect::<Result<Vec<_>>>()?;
380
381        if syms.is_empty() {
382            let parser = self.linkee_parser.as_ref().unwrap_or(&self.parser).deref();
383            parser.find_addr(name, opts)
384        } else {
385            Ok(syms)
386        }
387    }
388
389    fn for_each(&self, opts: &FindAddrOpts, f: &mut ForEachFn<'_>) -> Result<()> {
390        if let SymType::Variable = opts.sym_type {
391            return Err(Error::with_unsupported("not implemented"))
392        }
393
394        let mut overall_result = Ok(());
395        let () = self.units.for_each_function(|func| {
396            let result = self.function_to_sym_info(func, opts.file_offset);
397            match result {
398                Ok(Some(sym_info)) => f(&sym_info),
399                Ok(None) => ControlFlow::Continue(()),
400                Err(err) => {
401                    overall_result = Err(err);
402                    ControlFlow::Break(())
403                }
404            }
405        })?;
406        overall_result
407    }
408}
409
410impl Debug for DwarfResolver {
411    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
412        let module = self
413            .parser()
414            .module()
415            .unwrap_or_else(|| OsStr::new("<unknown>"));
416        write!(f, "DwarfResolver({module:?})")
417    }
418}
419
420
421// Conceptually this block belongs to the `DwarfResolver` type, but because it
422// uses a `Units` object with 'static lifetime we have to impl on `Units`
423// directly.
424impl<'dwarf> Units<'dwarf> {
425    /// Fill in source code information for an address to the provided
426    /// `ResolvedSym`.
427    ///
428    /// `addr` is a normalized address.
429    fn fill_code_info<'slf>(
430        &'slf self,
431        sym: &mut ResolvedSym<'slf>,
432        addr: Addr,
433        opts: &FindSymOpts,
434        data: Option<(&'slf Function<'dwarf>, &'slf Unit<'dwarf>)>,
435    ) -> Result<()> {
436        if !opts.code_info() {
437            return Ok(())
438        }
439
440        let direct_location = if let Some(direct_location) = self.find_location(addr)? {
441            direct_location
442        } else {
443            return Ok(())
444        };
445
446        let Location {
447            dir,
448            file,
449            line,
450            column,
451        } = direct_location;
452
453        let mut direct_code_info = CodeInfo {
454            dir: Some(Cow::Borrowed(dir)),
455            file: Cow::Borrowed(file),
456            line,
457            column: column.map(|col| col.try_into().unwrap_or(u16::MAX)),
458            _non_exhaustive: (),
459        };
460
461        let inlined = if opts.inlined_fns() {
462            if let Some((function, unit)) = data {
463                if let Some(inline_stack) = self.find_inlined_functions(addr, function, unit)? {
464                    let mut inlined = Vec::<InlinedFn>::with_capacity(inline_stack.len());
465                    for result in inline_stack {
466                        let (name, location) = result?;
467                        let mut code_info = location.map(|location| {
468                            let Location {
469                                dir,
470                                file,
471                                line,
472                                column,
473                            } = location;
474
475                            CodeInfo {
476                                dir: Some(Cow::Borrowed(dir)),
477                                file: Cow::Borrowed(file),
478                                line,
479                                column: column.map(|col| col.try_into().unwrap_or(u16::MAX)),
480                                _non_exhaustive: (),
481                            }
482                        });
483
484                        // For each frame we need to move the code information
485                        // up by one layer.
486                        if let Some(ref mut last_code_info) =
487                            inlined.last_mut().map(|f| &mut f.code_info)
488                        {
489                            let () = swap(&mut code_info, last_code_info);
490                        } else if let Some(code_info) = &mut code_info {
491                            let () = swap(code_info, &mut direct_code_info);
492                        }
493
494                        let inlined_fn = InlinedFn {
495                            name: Cow::Borrowed(name),
496                            code_info,
497                            _non_exhaustive: (),
498                        };
499                        let () = inlined.push(inlined_fn);
500                    }
501                    inlined
502                } else {
503                    Vec::new()
504                }
505            } else {
506                Vec::new()
507            }
508        } else {
509            Vec::new()
510        };
511
512        sym.code_info = Some(Box::new(direct_code_info));
513        sym.inlined = inlined.into_boxed_slice();
514
515        Ok(())
516    }
517}
518
519
520#[cfg(test)]
521mod tests {
522    use super::*;
523
524    use std::env::current_exe;
525    use std::ffi::OsStr;
526    use std::ops::ControlFlow;
527    use std::path::PathBuf;
528
529    use test_log::test;
530
531    use crate::ErrorKind;
532
533
534    /// Exercise the `Debug` representation of various types.
535    #[test]
536    fn debug_repr() {
537        let bin_name = current_exe().unwrap();
538        let resolver = DwarfResolver::open(&bin_name).unwrap();
539        assert_ne!(format!("{resolver:?}"), "");
540    }
541
542    /// Check that we can convert a `gimli::Error` into our own error type.
543    #[test]
544    fn error_conversion() {
545        let inner = gimli::Error::Io;
546        let err = Result::<(), _>::Err(inner)
547            .context("failed to read")
548            .unwrap_err();
549        assert_eq!(format!("{err:#}"), format!("failed to read: {inner}"));
550
551        let err = Result::<(), _>::Err(inner)
552            .with_context(|| "failed to read")
553            .unwrap_err();
554        assert_eq!(format!("{err:#}"), format!("failed to read: {inner}"));
555    }
556
557    /// Check that we resolve debug links correctly.
558    #[test]
559    fn debug_link_resolution() {
560        let path = Path::new(&env!("CARGO_MANIFEST_DIR"))
561            .join("data")
562            .join("test-stable-addrs-stripped-with-link.bin");
563        let resolver = DwarfResolver::open(&path).unwrap();
564        assert!(resolver.linkee_parser.is_some());
565
566        let linkee_path = Path::new(&env!("CARGO_MANIFEST_DIR"))
567            .join("data")
568            .join("test-stable-addrs-dwarf-only.dbg");
569        assert_eq!(
570            resolver.linkee_parser.as_ref().unwrap().module(),
571            Some(linkee_path.as_os_str())
572        );
573    }
574
575    /// Check that we can discover DWARF packages as expected.
576    #[test]
577    fn dwp_discovery() {
578        let path = Path::new(&env!("CARGO_MANIFEST_DIR"))
579            .join("data")
580            .join("test-rs-split-dwarf.bin");
581        let parser = ElfParser::open(&path).unwrap();
582        let dwp_parser = try_find_dwp(&parser).unwrap();
583        assert!(dwp_parser.is_some());
584
585        let path = Path::new(&env!("CARGO_MANIFEST_DIR"))
586            .join("data")
587            .join("test-rs.bin");
588        let parser = ElfParser::open(&path).unwrap();
589        let dwp_parser = try_find_dwp(&parser).unwrap();
590        assert!(dwp_parser.is_none());
591    }
592
593    /// Check that we can find the source code location of an address.
594    #[test]
595    fn source_location_finding() {
596        let bin_name = Path::new(&env!("CARGO_MANIFEST_DIR"))
597            .join("data")
598            .join("test-stable-addrs.bin");
599        let resolver = DwarfResolver::open(bin_name.as_ref()).unwrap();
600
601        let info = resolver
602            .find_sym(0x2000200, &FindSymOpts::CodeInfo)
603            .unwrap()
604            .unwrap()
605            .code_info
606            .unwrap();
607        assert_ne!(info.dir, Some(Cow::Owned(PathBuf::new())));
608        assert_eq!(info.file, OsStr::new("test-stable-addrs.c"));
609        assert_eq!(info.line, Some(10));
610        assert!(info.column.is_some());
611    }
612
613    /// Check that we can look up a symbol in DWARF debug information.
614    #[test]
615    fn lookup_symbol() {
616        let test_dwarf = Path::new(&env!("CARGO_MANIFEST_DIR"))
617            .join("data")
618            .join("test-stable-addrs-stripped-elf-with-dwarf.bin");
619        let opts = FindAddrOpts {
620            file_offset: false,
621            sym_type: SymType::Function,
622        };
623        let resolver = DwarfResolver::open(test_dwarf.as_ref()).unwrap();
624
625        let symbols = resolver.find_addr("factorial", &opts).unwrap();
626        assert_eq!(symbols.len(), 1);
627
628        // `factorial` resides at address 0x2000200.
629        let symbol = symbols.first().unwrap();
630        assert_eq!(symbol.addr, 0x2000200);
631    }
632
633    /// Check that we fail to look up variables.
634    #[test]
635    fn unsupported_ops() {
636        let test_dwarf = Path::new(&env!("CARGO_MANIFEST_DIR"))
637            .join("data")
638            .join("test-stable-addrs-stripped-elf-with-dwarf.bin");
639        let opts = FindAddrOpts {
640            file_offset: false,
641            sym_type: SymType::Variable,
642        };
643        let resolver = DwarfResolver::open(test_dwarf.as_ref()).unwrap();
644
645        let err = resolver.find_addr("factorial", &opts).unwrap_err();
646        assert_eq!(err.kind(), ErrorKind::Unsupported);
647
648        let err = resolver
649            .for_each(&opts, &mut |_| ControlFlow::Continue(()))
650            .unwrap_err();
651        assert_eq!(err.kind(), ErrorKind::Unsupported);
652    }
653}