blazesym/symbolize/
symbolizer.rs

1use std::borrow::Cow;
2use std::cell::OnceCell;
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::ffi::OsStr;
6use std::fmt::Debug;
7use std::fmt::Formatter;
8use std::fmt::Result as FmtResult;
9use std::fs::File;
10use std::mem::take;
11use std::ops::Deref as _;
12use std::ops::Range;
13use std::path::Path;
14use std::path::PathBuf;
15use std::rc::Rc;
16
17#[cfg(feature = "apk")]
18use crate::apk::create_apk_elf_path;
19#[cfg(feature = "breakpad")]
20use crate::breakpad::BreakpadResolver;
21use crate::elf::ElfParser;
22use crate::elf::ElfResolver;
23use crate::elf::ElfResolverData;
24use crate::elf::StaticMem;
25#[cfg(feature = "dwarf")]
26use crate::elf::DEFAULT_DEBUG_DIRS;
27use crate::file_cache::FileCache;
28#[cfg(feature = "gsym")]
29use crate::gsym::GsymResolver;
30use crate::insert_map::InsertMap;
31use crate::kernel::KernelResolver;
32use crate::kernel::KsymResolver;
33use crate::kernel::KALLSYMS;
34use crate::log;
35use crate::maps;
36use crate::maps::EntryPath;
37use crate::maps::MapsEntry;
38use crate::maps::PathName;
39use crate::mmap::Mmap;
40use crate::normalize;
41use crate::normalize::normalize_sorted_user_addrs_with_entries;
42use crate::normalize::Handler as _;
43#[cfg(feature = "apk")]
44use crate::pathlike::PathLike;
45use crate::perf_map::PerfMap;
46use crate::symbolize::Resolve;
47use crate::symbolize::TranslateFileOffset;
48use crate::util;
49#[cfg(linux)]
50use crate::util::uname_release;
51use crate::util::Dbg;
52#[cfg(feature = "tracing")]
53use crate::util::Hexify;
54use crate::util::OnceCellExt as _;
55use crate::vdso::create_vdso_parser;
56use crate::vdso::VDSO_MAPS_COMPONENT;
57#[cfg(feature = "apk")]
58use crate::zip;
59use crate::Addr;
60use crate::Error;
61use crate::ErrorExt as _;
62use crate::ErrorKind;
63use crate::IntoError as _;
64use crate::Pid;
65use crate::Result;
66
67use super::cache;
68use super::cache::Cache;
69#[cfg(feature = "apk")]
70use super::source::Apk;
71#[cfg(feature = "breakpad")]
72use super::source::Breakpad;
73use super::source::Elf;
74#[cfg(feature = "gsym")]
75use super::source::Gsym;
76#[cfg(feature = "gsym")]
77use super::source::GsymData;
78#[cfg(feature = "gsym")]
79use super::source::GsymFile;
80use super::source::Kernel;
81use super::source::Process;
82use super::source::Source;
83use super::FindSymOpts;
84use super::Input;
85use super::Reason;
86use super::ResolvedSym;
87use super::SrcLang;
88use super::Sym;
89use super::Symbolize;
90use super::Symbolized;
91
92
93/// A type for displaying debug information for a [`MapsEntry`].
94#[cfg(feature = "tracing")]
95struct DebugMapsEntry<'entry>(&'entry MapsEntry);
96
97#[cfg(feature = "tracing")]
98impl Debug for DebugMapsEntry<'_> {
99    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
100        let MapsEntry {
101            range,
102            offset,
103            path_name,
104            ..
105        } = self.0;
106
107        let path = match path_name {
108            // For debugging purposes we work with the symbolic path, as
109            // it's the most easy to reason about. Note that it may not
110            // be what ends up being used during symbolization.
111            Some(PathName::Path(path)) => &path.symbolic_path,
112            Some(PathName::Component(component)) => Path::new(component),
113            None => Path::new("<no-path>"),
114        };
115
116        f.debug_struct(stringify!(MapsEntry))
117            .field(stringify!(range), &format_args!("{range:#x?}"))
118            .field(stringify!(offset), &format_args!("{offset:#x?}"))
119            .field(stringify!(path), &path.display())
120            .finish()
121    }
122}
123
124
125/// Demangle a symbol name using the demangling scheme for the given language.
126#[cfg(feature = "demangle")]
127fn maybe_demangle_impl(name: Cow<'_, str>, language: SrcLang) -> Cow<'_, str> {
128    match language {
129        SrcLang::Rust => rustc_demangle::try_demangle(name.as_ref())
130            .ok()
131            .as_ref()
132            .map(|x| Cow::Owned(format!("{x:#}"))),
133        SrcLang::Cpp => cpp_demangle::Symbol::new(name.as_ref())
134            .ok()
135            .and_then(|x| x.demangle(&Default::default()).ok().map(Cow::Owned)),
136        SrcLang::Unknown => rustc_demangle::try_demangle(name.as_ref())
137            .map(|x| Cow::Owned(format!("{x:#}")))
138            .ok()
139            .or_else(|| {
140                cpp_demangle::Symbol::new(name.as_ref())
141                    .ok()
142                    .and_then(|sym| sym.demangle(&Default::default()).ok().map(Cow::Owned))
143            }),
144    }
145    .unwrap_or(name)
146}
147
148#[cfg(not(feature = "demangle"))]
149fn maybe_demangle_impl(name: Cow<'_, str>, _language: SrcLang) -> Cow<'_, str> {
150    // Demangling is disabled.
151    name
152}
153
154/// Demangle the provided symbol if asked for and possible.
155fn maybe_demangle(symbol: Cow<'_, str>, language: SrcLang, demangle: bool) -> Cow<'_, str> {
156    if demangle {
157        maybe_demangle_impl(symbol, language)
158    } else {
159        symbol
160    }
161}
162
163
164/// Symbolize an address using the provided [`Resolver`].
165pub(crate) fn symbolize_with_resolver<'slf>(
166    addr: Addr,
167    resolver: &Resolver<'_, 'slf>,
168    find_sym_opts: &FindSymOpts,
169    demangle: bool,
170) -> Result<Symbolized<'slf>> {
171    /// Convert a `ResolvedSym` into a `Sym`, potentially performing
172    /// symbol name demangling in the process.
173    fn convert_sym<'sym>(addr: Addr, sym: ResolvedSym<'sym>, demangle: bool) -> Sym<'sym> {
174        let ResolvedSym {
175            name,
176            module,
177            addr: sym_addr,
178            size,
179            lang,
180            code_info,
181            mut inlined,
182            _non_exhaustive: (),
183        } = sym;
184
185        let () = inlined.iter_mut().for_each(|inlined_fn| {
186            let name = take(&mut inlined_fn.name);
187            inlined_fn.name = maybe_demangle(name, lang, demangle);
188        });
189
190        let sym = Sym {
191            name: maybe_demangle(Cow::Borrowed(name), lang, demangle),
192            module: module.map(Cow::Borrowed),
193            addr: sym_addr,
194            offset: (addr - sym_addr) as usize,
195            size,
196            code_info,
197            inlined,
198            _non_exhaustive: (),
199        };
200        sym
201    }
202
203    let sym = match resolver {
204        Resolver::Uncached(resolver) => match resolver.find_sym(addr, find_sym_opts)? {
205            Ok(sym) => convert_sym(addr, sym, demangle).into_owned(),
206            Err(reason) => return Ok(Symbolized::Unknown(reason)),
207        },
208        Resolver::Cached(resolver) => match resolver.find_sym(addr, find_sym_opts)? {
209            Ok(sym) => convert_sym(addr, sym, demangle),
210            Err(reason) => return Ok(Symbolized::Unknown(reason)),
211        },
212    };
213
214    Ok(Symbolized::Sym(sym))
215}
216
217/// Information about a member inside an APK.
218///
219/// This type is used in conjunction with the APK "dispatcher" infrastructure;
220/// see [`Builder::set_apk_dispatcher`].
221#[cfg(feature = "apk")]
222#[derive(Clone, Debug)]
223pub struct ApkMemberInfo<'dat> {
224    /// The path to the APK itself.
225    pub apk_path: &'dat Path,
226    /// The path to the member inside the APK.
227    pub member_path: &'dat Path,
228    /// The memory mapped member data.
229    pub member_mmap: Mmap,
230    /// The struct is non-exhaustive and open to extension.
231    #[doc(hidden)]
232    pub _non_exhaustive: (),
233}
234
235
236/// The signature of a dispatcher function for APK symbolization.
237///
238/// This type is used in conjunction with the APK "dispatcher" infrastructure;
239/// see [`Builder::set_apk_dispatcher`].
240///
241/// If this function returns `Some` resolver, this resolver will be used
242/// for addresses belonging to the represented archive member. If `None`
243/// is returned, the default dispatcher will be used instead.
244// TODO: Use a trait alias once stable.
245#[cfg(feature = "apk")]
246pub trait ApkDispatch: Fn(ApkMemberInfo<'_>) -> Result<Option<Box<dyn Resolve>>> {}
247
248#[cfg(feature = "apk")]
249impl<F> ApkDispatch for F where F: Fn(ApkMemberInfo<'_>) -> Result<Option<Box<dyn Resolve>>> {}
250
251
252/// The signature of a dispatcher function for process symbolization.
253///
254/// This type is used in conjunction with the process "dispatcher"
255/// infrastructure; see [`Builder::set_process_dispatcher`].
256///
257/// If this function returns `Some` resolver, this resolver will be used
258/// for addresses belonging to the represented process member. If `None`
259/// is returned, the default dispatcher will be used instead.
260pub trait ProcessDispatch: Fn(ProcessMemberInfo<'_>) -> Result<Option<Box<dyn Resolve>>> {}
261
262impl<F> ProcessDispatch for F where F: Fn(ProcessMemberInfo<'_>) -> Result<Option<Box<dyn Resolve>>> {}
263
264
265#[cfg(feature = "apk")]
266fn default_apk_dispatcher(
267    apk_path: &dyn PathLike,
268    info: ApkMemberInfo<'_>,
269    debug_dirs: Option<&[PathBuf]>,
270) -> Result<Box<dyn Resolve>> {
271    // Create an Android-style binary-in-APK path for
272    // reporting purposes.
273    let apk_elf_path = create_apk_elf_path(apk_path.represented_path(), info.member_path);
274    let parser = Rc::new(ElfParser::from_mmap(
275        info.member_mmap,
276        Some(apk_elf_path.into_os_string()),
277    ));
278    let resolver = ElfResolver::from_parser(parser, debug_dirs)?;
279    let resolver = Box::new(resolver);
280    Ok(resolver)
281}
282
283
284/// Information about an address space member of a process.
285#[derive(Clone, Debug)]
286pub struct ProcessMemberInfo<'dat> {
287    /// The virtual address range covered by this member.
288    pub range: Range<Addr>,
289    /// The "pathname" component in a `/proc/[pid]/maps` entry. See
290    /// `proc(5)` section `/proc/[pid]/maps`.
291    pub member_entry: &'dat PathName,
292    /// The struct is non-exhaustive and open to extension.
293    #[doc(hidden)]
294    pub _non_exhaustive: (),
295}
296
297
298/// A builder for configurable construction of [`Symbolizer`] objects.
299///
300/// By default all features are enabled.
301#[derive(Debug)]
302pub struct Builder {
303    /// Whether or not to automatically reload file system based
304    /// symbolization sources that were updated since the last
305    /// symbolization operation.
306    auto_reload: bool,
307    /// Whether to attempt to gather source code location information.
308    code_info: bool,
309    /// Whether to report inlined functions as part of symbolization.
310    inlined_fns: bool,
311    /// Whether or not to transparently demangle symbols.
312    ///
313    /// Demangling happens on a best-effort basis. Currently supported
314    /// languages are Rust and C++ and the flag will have no effect if
315    /// the underlying language does not mangle symbols (such as C).
316    demangle: bool,
317    /// List of additional directories in which split debug information
318    /// is looked for.
319    #[cfg(feature = "dwarf")]
320    debug_dirs: Vec<PathBuf>,
321    /// The "dispatch" function to use when symbolizing addresses
322    /// mapping to members of an APK.
323    #[cfg(feature = "apk")]
324    apk_dispatch: Option<Dbg<Box<dyn ApkDispatch>>>,
325    /// The "dispatch" function to use when symbolizing addresses
326    /// mapping to members of a process.
327    process_dispatch: Option<Dbg<Box<dyn ProcessDispatch>>>,
328}
329
330impl Builder {
331    /// Enable/disable auto reloading of symbolization sources in the
332    /// presence of updates.
333    pub fn enable_auto_reload(mut self, enable: bool) -> Self {
334        self.auto_reload = enable;
335        self
336    }
337
338    /// Enable/disable source code location information (line numbers,
339    /// file names etc.).
340    ///
341    /// This option only has an effect if `debug_syms` of the particular
342    /// symbol source is set to `true`. Furthermore, it is a necessary
343    /// prerequisite for retrieving inlined function information (see
344    /// [`Self::enable_inlined_fns`]).
345    pub fn enable_code_info(mut self, enable: bool) -> Self {
346        self.code_info = enable;
347        self
348    }
349
350    /// Enable/disable inlined function reporting.
351    ///
352    /// This option only has an effect if `code_info` is `true`.
353    pub fn enable_inlined_fns(mut self, enable: bool) -> Self {
354        self.inlined_fns = enable;
355        self
356    }
357
358    /// Enable/disable transparent demangling of symbol names.
359    ///
360    /// Demangling happens on a best-effort basis. Currently supported languages
361    /// are Rust and C++ and the flag will have no effect if the underlying
362    /// language does not mangle symbols (such as C).
363    pub fn enable_demangling(mut self, enable: bool) -> Self {
364        self.demangle = enable;
365        self
366    }
367
368    /// Set debug directories to search for split debug information.
369    ///
370    /// These directories will be consulted (in given order) when resolving
371    /// debug links in binaries. By default `/usr/lib/debug` and `/lib/debug/`
372    /// will be searched. Setting a list here will overwrite these defaults, so
373    /// make sure to include these directories as desired.
374    ///
375    /// Note that the directory containing a symbolization source is always an
376    /// implicit candidate target directory of the highest precedence.
377    ///
378    /// A value of `None` reverts to using the default set of directories.
379    #[cfg(feature = "dwarf")]
380    #[cfg_attr(docsrs, doc(cfg(feature = "dwarf")))]
381    pub fn set_debug_dirs<D, P>(mut self, debug_dirs: Option<D>) -> Self
382    where
383        D: IntoIterator<Item = P>,
384        P: AsRef<Path>,
385    {
386        if let Some(debug_dirs) = debug_dirs {
387            self.debug_dirs = debug_dirs
388                .into_iter()
389                .map(|p| p.as_ref().to_path_buf())
390                .collect();
391        } else {
392            self.debug_dirs = DEFAULT_DEBUG_DIRS
393                .iter()
394                .map(PathBuf::from)
395                .collect::<Vec<_>>();
396        }
397        self
398    }
399
400    /// Set the "dispatch" function to use when symbolizing addresses
401    /// mapping to members of an APK.
402    #[cfg(feature = "apk")]
403    #[cfg_attr(docsrs, doc(cfg(feature = "apk")))]
404    pub fn set_apk_dispatcher<D>(mut self, apk_dispatch: D) -> Self
405    where
406        D: ApkDispatch + 'static,
407    {
408        self.apk_dispatch = Some(Dbg(Box::new(apk_dispatch)));
409        self
410    }
411
412    /// Set the "dispatch" function to use when symbolizing addresses
413    /// mapping to members of a process.
414    pub fn set_process_dispatcher<D>(mut self, process_dispatch: D) -> Self
415    where
416        D: ProcessDispatch + 'static,
417    {
418        self.process_dispatch = Some(Dbg(Box::new(process_dispatch)));
419        self
420    }
421
422    /// Create the [`Symbolizer`] object.
423    pub fn build(self) -> Symbolizer {
424        let Self {
425            auto_reload,
426            code_info,
427            inlined_fns,
428            demangle,
429            #[cfg(feature = "dwarf")]
430            debug_dirs,
431            #[cfg(feature = "apk")]
432            apk_dispatch,
433            process_dispatch,
434        } = self;
435
436        let find_sym_opts = match (code_info, inlined_fns) {
437            (false, inlined_fns) => {
438                if inlined_fns {
439                    log::warn!(
440                        "inlined function reporting asked for but more general code information inquiry is disabled; flag is being ignored"
441                    );
442                }
443                FindSymOpts::Basic
444            }
445            (true, false) => FindSymOpts::CodeInfo,
446            (true, true) => FindSymOpts::CodeInfoAndInlined,
447        };
448
449        Symbolizer {
450            #[cfg(feature = "apk")]
451            apk_cache: FileCache::builder().enable_auto_reload(auto_reload).build(),
452            #[cfg(feature = "breakpad")]
453            breakpad_cache: FileCache::builder().enable_auto_reload(auto_reload).build(),
454            elf_cache: FileCache::builder().enable_auto_reload(auto_reload).build(),
455            #[cfg(feature = "gsym")]
456            gsym_cache: FileCache::builder().enable_auto_reload(auto_reload).build(),
457            ksym_cache: FileCache::builder().enable_auto_reload(auto_reload).build(),
458            perf_map_cache: FileCache::builder().enable_auto_reload(auto_reload).build(),
459            process_vma_cache: RefCell::new(HashMap::new()),
460            process_cache: InsertMap::new(),
461            vdso_parser: OnceCell::new(),
462            find_sym_opts,
463            demangle,
464            #[cfg(feature = "dwarf")]
465            debug_dirs,
466            #[cfg(feature = "apk")]
467            apk_dispatch,
468            process_dispatch,
469        }
470    }
471}
472
473impl Default for Builder {
474    fn default() -> Self {
475        Self {
476            auto_reload: true,
477            code_info: true,
478            inlined_fns: true,
479            demangle: true,
480            #[cfg(feature = "dwarf")]
481            debug_dirs: DEFAULT_DEBUG_DIRS
482                .iter()
483                .map(PathBuf::from)
484                .collect::<Vec<_>>(),
485            #[cfg(feature = "apk")]
486            apk_dispatch: None,
487            process_dispatch: None,
488        }
489    }
490}
491
492
493struct SymbolizeHandler<'sym> {
494    /// The "outer" `Symbolizer` instance.
495    symbolizer: &'sym Symbolizer,
496    /// The PID of the process in which we symbolize.
497    pid: Pid,
498    /// Whether or not to consult debug symbols to satisfy the request
499    /// (if present).
500    debug_syms: bool,
501    /// Whether or not to consult the process' perf map (if any) to
502    /// satisfy the request.
503    perf_map: bool,
504    /// Whether to work with `/proc/<pid>/map_files/` entries or with
505    /// symbolic paths mentioned in `/proc/<pid>/maps` instead.
506    map_files: bool,
507    /// Whether or not to symbolize addresses in a vDSO (virtual dynamic
508    /// shared object).
509    vdso: bool,
510    /// Symbols representing the symbolized addresses.
511    all_symbols: Vec<Symbolized<'sym>>,
512}
513
514impl SymbolizeHandler<'_> {
515    #[cfg(feature = "apk")]
516    fn handle_apk_addr(&mut self, addr: Addr, file_off: u64, entry_path: &EntryPath) -> Result<()> {
517        let result = if self.map_files {
518            self.symbolizer
519                .apk_resolver(entry_path, file_off, self.debug_syms)?
520        } else {
521            let path = &entry_path.symbolic_path;
522            self.symbolizer
523                .apk_resolver(path, file_off, self.debug_syms)?
524        };
525
526        match result {
527            Some((elf_resolver, elf_addr)) => {
528                let symbol = self.symbolizer.symbolize_with_resolver(
529                    elf_addr,
530                    &Resolver::Cached(elf_resolver.as_symbolize()),
531                )?;
532                let () = self.all_symbols.push(symbol);
533            }
534            None => self.handle_unknown_addr(addr, Reason::InvalidFileOffset),
535        }
536        Ok(())
537    }
538
539    fn handle_elf_addr(&mut self, addr: Addr, file_off: u64, entry_path: &EntryPath) -> Result<()> {
540        let resolver = if self.map_files {
541            self.symbolizer.elf_cache.elf_resolver(
542                entry_path,
543                self.symbolizer.maybe_debug_dirs(self.debug_syms),
544            )
545        } else {
546            let path = &entry_path.symbolic_path;
547            self.symbolizer
548                .elf_cache
549                .elf_resolver(path, self.symbolizer.maybe_debug_dirs(self.debug_syms))
550        }?;
551
552
553        match resolver.file_offset_to_virt_offset(file_off)? {
554            Some(addr) => {
555                let symbol = self
556                    .symbolizer
557                    .symbolize_with_resolver(addr, &Resolver::Cached(resolver.deref()))?;
558                let () = self.all_symbols.push(symbol);
559            }
560            None => self.handle_unknown_addr(addr, Reason::InvalidFileOffset),
561        }
562        Ok(())
563    }
564
565    fn handle_perf_map_addr(&mut self, addr: Addr) -> Result<()> {
566        if let Some(perf_map) = self.symbolizer.perf_map_resolver(self.pid)? {
567            let symbolized = self
568                .symbolizer
569                .symbolize_with_resolver(addr, &Resolver::Cached(perf_map))?;
570            let () = self.all_symbols.push(symbolized);
571        } else {
572            let () = self.handle_unknown_addr(addr, Reason::UnknownAddr);
573        }
574        Ok(())
575    }
576
577    fn handle_vdso_addr(
578        &mut self,
579        addr: Addr,
580        file_off: u64,
581        vdso_range: &Range<Addr>,
582    ) -> Result<()> {
583        let parser = self.symbolizer.vdso_parser(self.pid, vdso_range)?;
584        match parser.file_offset_to_virt_offset(file_off)? {
585            Some(addr) => {
586                let symbol = self
587                    .symbolizer
588                    .symbolize_with_resolver(addr, &Resolver::Cached(parser))?;
589                let () = self.all_symbols.push(symbol);
590            }
591            None => self.handle_unknown_addr(addr, Reason::InvalidFileOffset),
592        }
593        Ok(())
594    }
595}
596
597impl normalize::Handler<Reason> for SymbolizeHandler<'_> {
598    #[cfg_attr(feature = "tracing", crate::log::instrument(skip_all, fields(addr = format_args!("{_addr:#x}"), ?reason)))]
599    fn handle_unknown_addr(&mut self, _addr: Addr, reason: Reason) {
600        let () = self.all_symbols.push(Symbolized::Unknown(reason));
601    }
602
603    #[cfg_attr(feature = "tracing", crate::log::instrument(skip_all, fields(addr = format_args!("{addr:#x}"), entry = ?DebugMapsEntry(entry))))]
604    fn handle_entry_addr(&mut self, addr: Addr, entry: &MapsEntry) -> Result<()> {
605        let file_off = addr - entry.range.start + entry.offset;
606
607        if let Some(path_name) = &entry.path_name {
608            if let Some(resolver) = self
609                .symbolizer
610                .process_dispatch_resolver(entry.range.clone(), path_name)?
611            {
612                let () = match resolver.file_offset_to_virt_offset(file_off)? {
613                    Some(addr) => {
614                        let symbol = self.symbolizer.symbolize_with_resolver(
615                            addr,
616                            &Resolver::Cached(resolver.as_symbolize()),
617                        )?;
618                        let () = self.all_symbols.push(symbol);
619                    }
620                    None => self.handle_unknown_addr(addr, Reason::InvalidFileOffset),
621                };
622                return Ok(())
623            }
624
625            // If there is no process dispatcher installed or it did
626            // not return a resolver for the entry, we use our
627            // default handling scheme.
628        }
629
630        match &entry.path_name {
631            Some(PathName::Path(entry_path)) => {
632                let ext = entry_path
633                    .symbolic_path
634                    .extension()
635                    .unwrap_or_else(|| OsStr::new(""));
636                match ext.to_str() {
637                    #[cfg(feature = "apk")]
638                    Some("apk") | Some("zip") => self.handle_apk_addr(addr, file_off, entry_path),
639                    _ => self.handle_elf_addr(addr, file_off, entry_path),
640                }
641            }
642            Some(PathName::Component(component)) => {
643                match component.as_str() {
644                    component if self.vdso && component == VDSO_MAPS_COMPONENT => {
645                        let () = self.handle_vdso_addr(addr, file_off, &entry.range)?;
646                    }
647                    _ => {
648                        let () = self.handle_unknown_addr(addr, Reason::Unsupported);
649                    }
650                }
651                Ok(())
652            }
653            // If there is no path associated with this entry, we don't
654            // really have any idea what the address may belong to. But
655            // there is a chance that the address is part of the perf
656            // map, so check that.
657            // TODO: It's not entirely clear if a perf map could also
658            //       cover addresses belonging to entries with a path.
659            None if self.perf_map => self.handle_perf_map_addr(addr),
660            None => {
661                let () = self.handle_unknown_addr(addr, Reason::UnknownAddr);
662                Ok(())
663            }
664        }
665    }
666}
667
668
669/// An enumeration helping us to differentiate between cached and uncached
670/// symbol resolvers.
671///
672/// An "uncached" resolver is one that is created on the spot. We do so for
673/// cases when we cannot keep the input data, for example (e.g., when we
674/// have no control over its lifetime).
675/// A "cached" resolver is one that ultimately lives in one of our internal
676/// caches. These caches have the same lifetime as the `Symbolizer` object
677/// itself (represented here as `'slf`).
678///
679/// Objects of this type are at the core of our logic determining whether to
680/// heap allocate certain data such as paths or symbol names or whether to just
681/// hand out references to mmap'ed (or potentially static) data.
682#[derive(Debug)]
683pub(crate) enum Resolver<'tmp, 'slf> {
684    Uncached(&'tmp (dyn Symbolize + 'tmp)),
685    Cached(&'slf dyn Symbolize),
686}
687
688#[cfg(feature = "tracing")]
689impl<'tmp, 'slf: 'tmp> Resolver<'tmp, 'slf> {
690    fn inner(&self) -> &(dyn Symbolize + '_) {
691        match self {
692            Self::Uncached(symbolize) | Self::Cached(symbolize) => *symbolize,
693        }
694    }
695}
696
697
698/// A helper type for coercing an iterator that is guaranteed to have
699/// only a single element into said element, via generic means.
700#[repr(transparent)]
701struct Single<T>(T);
702
703impl<A> FromIterator<A> for Single<A> {
704    fn from_iter<I>(i: I) -> Self
705    where
706        I: IntoIterator<Item = A>,
707    {
708        let mut iter = i.into_iter();
709        let slf = Single(iter.next().unwrap());
710        debug_assert!(iter.next().is_none());
711        slf
712    }
713}
714
715
716/// A helper trait used for abstracting over input cardinality while
717/// only heap allocating as necessary.
718trait Addrs: AsRef<[Addr]> {
719    type OutTy<'slf>: FromIterator<Result<Symbolized<'slf>>>;
720}
721
722impl Addrs for &[Addr] {
723    type OutTy<'slf> = Result<Vec<Symbolized<'slf>>>;
724}
725
726impl Addrs for [Addr; 1] {
727    type OutTy<'slf> = Single<Result<Symbolized<'slf>>>;
728}
729
730
731/// Symbolizer provides an interface to symbolize addresses.
732///
733/// An instance of this type is the unit at which symbolization inputs are
734/// cached. That is to say, source files (DWARF, ELF, ...) and the parsed data
735/// structures may be kept around in memory for the lifetime of this object to
736/// speed up future symbolization requests. If you are working with large input
737/// sources and/or do not intend to perform multiple symbolization requests
738/// (i.e., [`symbolize`][Symbolizer::symbolize] or
739/// [`symbolize_single`][Symbolizer::symbolize_single] calls) for the same
740/// symbolization source, you may want to consider creating a new `Symbolizer`
741/// instance regularly.
742///
743/// # Notes
744/// Please note that demangling results are not cached.
745#[derive(Debug)]
746pub struct Symbolizer {
747    #[cfg(feature = "apk")]
748    apk_cache: FileCache<(zip::Archive, InsertMap<Range<u64>, Box<dyn Resolve>>)>,
749    #[cfg(feature = "breakpad")]
750    breakpad_cache: FileCache<BreakpadResolver>,
751    elf_cache: FileCache<ElfResolverData>,
752    #[cfg(feature = "gsym")]
753    gsym_cache: FileCache<GsymResolver<'static>>,
754    ksym_cache: FileCache<Rc<KsymResolver>>,
755    perf_map_cache: FileCache<PerfMap>,
756    /// Cache of VMA data on per-process basis.
757    ///
758    /// This member is only populated by explicit requests for caching
759    /// data by the user.
760    process_vma_cache: RefCell<HashMap<Pid, Box<[maps::MapsEntry]>>>,
761    process_cache: InsertMap<PathName, Option<Box<dyn Resolve>>>,
762    /// The ELF parser used for the system-wide vDSO.
763    vdso_parser: OnceCell<Box<ElfParser<StaticMem>>>,
764    find_sym_opts: FindSymOpts,
765    demangle: bool,
766    #[cfg(feature = "dwarf")]
767    debug_dirs: Vec<PathBuf>,
768    #[cfg(feature = "apk")]
769    apk_dispatch: Option<Dbg<Box<dyn ApkDispatch>>>,
770    process_dispatch: Option<Dbg<Box<dyn ProcessDispatch>>>,
771}
772
773impl Symbolizer {
774    /// Create a new [`Symbolizer`].
775    pub fn new() -> Self {
776        Builder::default().build()
777    }
778
779    /// Retrieve a [`Builder`] object for configurable construction of a
780    /// [`Symbolizer`].
781    pub fn builder() -> Builder {
782        Builder::default()
783    }
784
785    /// Register an [`ElfResolver`] to use for subsequent symbolization
786    /// requests.
787    ///
788    /// Register an existing externally managed [`ElfResolver`] object
789    /// to use in subsequent symbolization requests involving `path`.
790    /// Doing so allows for reuse of already parsed ELF data.
791    ///
792    /// This method will fail if a cached [`ElfResolver`] is already
793    /// present for the given path.
794    pub fn register_elf_resolver(
795        &mut self,
796        path: &Path,
797        elf_resolver: Rc<ElfResolver>,
798    ) -> Result<()> {
799        self.elf_cache.register(path, elf_resolver)
800    }
801
802    /// Symbolize an address using the provided [`Resolver`].
803    #[cfg_attr(feature = "tracing", crate::log::instrument(skip_all, fields(addr = format_args!("{addr:#x}"), resolver = ?resolver.inner())))]
804    fn symbolize_with_resolver<'slf>(
805        &'slf self,
806        addr: Addr,
807        resolver: &Resolver<'_, 'slf>,
808    ) -> Result<Symbolized<'slf>> {
809        symbolize_with_resolver(addr, resolver, &self.find_sym_opts, self.demangle)
810    }
811
812    #[cfg(feature = "gsym")]
813    fn create_gsym_resolver(&self, path: &Path, file: &File) -> Result<GsymResolver<'static>> {
814        let resolver = GsymResolver::from_file(path.to_path_buf(), file)?;
815        Ok(resolver)
816    }
817
818    #[cfg(feature = "gsym")]
819    fn gsym_resolver<'slf>(&'slf self, path: &Path) -> Result<&'slf GsymResolver<'static>> {
820        let (file, cell) = self.gsym_cache.entry(path)?;
821        let resolver = cell.get_or_try_init_(|| self.create_gsym_resolver(path, file))?;
822        Ok(resolver)
823    }
824
825    #[cfg(feature = "apk")]
826    fn create_apk_resolver<'slf>(
827        &'slf self,
828        apk: &zip::Archive,
829        apk_path: &dyn PathLike,
830        file_off: u64,
831        debug_dirs: Option<&[PathBuf]>,
832        resolver_map: &'slf InsertMap<Range<u64>, Box<dyn Resolve>>,
833    ) -> Result<Option<(&'slf dyn Resolve, Addr)>> {
834        let actual_path = apk_path.actual_path();
835        // Find the APK entry covering the calculated file offset.
836        for apk_entry in apk.entries() {
837            let apk_entry = apk_entry.with_context(|| {
838                format!("failed to iterate `{}` members", actual_path.display())
839            })?;
840            let bounds = apk_entry.data_offset..apk_entry.data_offset + apk_entry.data.len() as u64;
841
842            if bounds.contains(&file_off) {
843                let resolver = resolver_map.get_or_try_insert(bounds.clone(), || {
844                    let mmap = apk
845                        .mmap()
846                        .constrain(bounds.clone())
847                        .ok_or_invalid_input(|| {
848                            format!(
849                                "invalid APK entry data bounds ({bounds:?}) in {}",
850                                actual_path.display()
851                            )
852                        })?;
853                    let info = ApkMemberInfo {
854                        apk_path: actual_path,
855                        member_path: apk_entry.path,
856                        member_mmap: mmap,
857                        _non_exhaustive: (),
858                    };
859
860                    let resolver = if let Some(Dbg(apk_dispatch)) = &self.apk_dispatch {
861                        if let Some(resolver) = (apk_dispatch)(info.clone())? {
862                            resolver
863                        } else {
864                            default_apk_dispatcher(apk_path, info, debug_dirs)?
865                        }
866                    } else {
867                        default_apk_dispatcher(apk_path, info, debug_dirs)?
868                    };
869
870                    Ok(resolver)
871                })?;
872
873                let elf_off = file_off - apk_entry.data_offset;
874                if let Some(addr) = resolver.file_offset_to_virt_offset(elf_off)? {
875                    return Ok(Some((resolver.deref(), addr)))
876                }
877                break
878            }
879        }
880
881        Ok(None)
882    }
883
884    #[cfg(feature = "apk")]
885    fn apk_resolver<'slf>(
886        &'slf self,
887        path: &dyn PathLike,
888        file_off: u64,
889        debug_syms: bool,
890    ) -> Result<Option<(&'slf dyn Resolve, Addr)>> {
891        let actual_path = path.actual_path();
892        let (file, cell) = self.apk_cache.entry(actual_path)?;
893        let (apk, resolvers) = cell.get_or_try_init_(|| {
894            let mmap = Mmap::builder()
895                .map(file)
896                .with_context(|| format!("failed to memory map `{}`", actual_path.display()))?;
897            let apk = zip::Archive::with_mmap(mmap)
898                .with_context(|| format!("failed to open zip file `{}`", actual_path.display()))?;
899            let resolvers = InsertMap::new();
900            Result::<_, Error>::Ok((apk, resolvers))
901        })?;
902
903        let debug_dirs = self.maybe_debug_dirs(debug_syms);
904        let result = self.create_apk_resolver(apk, path, file_off, debug_dirs, resolvers);
905        result
906    }
907
908    #[cfg(feature = "breakpad")]
909    fn create_breakpad_resolver(&self, path: &Path, file: &File) -> Result<BreakpadResolver> {
910        let resolver = BreakpadResolver::from_file(path.to_path_buf(), file)?;
911        Ok(resolver)
912    }
913
914    #[cfg(feature = "breakpad")]
915    fn breakpad_resolver<'slf>(&'slf self, path: &Path) -> Result<&'slf BreakpadResolver> {
916        let (file, cell) = self.breakpad_cache.entry(path)?;
917        let resolver = cell.get_or_try_init_(|| self.create_breakpad_resolver(path, file))?;
918        Ok(resolver)
919    }
920
921    fn create_perf_map_resolver(&self, path: &Path, file: &File) -> Result<PerfMap> {
922        let perf_map = PerfMap::from_file(path, file)?;
923        Ok(perf_map)
924    }
925
926    fn perf_map_resolver(&self, pid: Pid) -> Result<Option<&PerfMap>> {
927        let path = PerfMap::path(pid);
928
929        match self.perf_map_cache.entry(&path) {
930            Ok((file, cell)) => {
931                let perf_map =
932                    cell.get_or_try_init_(|| self.create_perf_map_resolver(&path, file))?;
933                Ok(Some(perf_map))
934            }
935            Err(err) if err.kind() == ErrorKind::NotFound => Ok(None),
936            Err(err) => {
937                Err(err).with_context(|| format!("failed to open perf map `{}`", path.display()))
938            }
939        }
940    }
941
942    fn vdso_parser<'slf>(
943        &'slf self,
944        pid: Pid,
945        range: &Range<Addr>,
946    ) -> Result<&'slf ElfParser<StaticMem>> {
947        let parser = self.vdso_parser.get_or_try_init_(|| {
948            let parser = create_vdso_parser(pid, range)?;
949            Result::<_, Error>::Ok(Box::new(parser))
950        })?;
951        Ok(parser)
952    }
953
954    fn process_dispatch_resolver<'slf>(
955        &'slf self,
956        range: Range<Addr>,
957        path_name: &PathName,
958    ) -> Result<Option<&'slf dyn Resolve>> {
959        if let Some(Dbg(process_dispatch)) = &self.process_dispatch {
960            let resolver = self
961                .process_cache
962                .get_or_try_insert(path_name.clone(), || {
963                    let info = ProcessMemberInfo {
964                        range,
965                        member_entry: path_name,
966                        _non_exhaustive: (),
967                    };
968                    (process_dispatch)(info)
969                })?;
970            Ok(resolver.as_deref())
971        } else {
972            Ok(None)
973        }
974    }
975
976    /// Symbolize the given list of user space addresses in the provided
977    /// process.
978    fn symbolize_user_addrs(
979        &self,
980        addrs: &[Addr],
981        pid: Pid,
982        debug_syms: bool,
983        perf_map: bool,
984        map_files: bool,
985        vdso: bool,
986    ) -> Result<Vec<Symbolized<'_>>> {
987        let mut handler = SymbolizeHandler {
988            symbolizer: self,
989            pid,
990            debug_syms,
991            perf_map,
992            map_files,
993            vdso,
994            all_symbols: Vec::with_capacity(addrs.len()),
995        };
996
997        let handler = util::with_ordered_elems(
998            addrs,
999            |handler: &mut SymbolizeHandler<'_>| handler.all_symbols.as_mut_slice(),
1000            |sorted_addrs| -> Result<SymbolizeHandler<'_>> {
1001                if let Some(cached) = self.process_vma_cache.borrow().get(&pid) {
1002                    let mut entry_iter = cached.iter().map(Ok);
1003                    let entries = |_addr| entry_iter.next();
1004
1005                    let () = normalize_sorted_user_addrs_with_entries(
1006                        sorted_addrs,
1007                        entries,
1008                        &mut handler,
1009                    )?;
1010                    Ok(handler)
1011                } else {
1012                    let mut entry_iter = maps::parse_filtered(pid)?;
1013                    let entries = |_addr| entry_iter.next();
1014
1015                    let () = normalize_sorted_user_addrs_with_entries(
1016                        sorted_addrs,
1017                        entries,
1018                        &mut handler,
1019                    )?;
1020                    Ok(handler)
1021                }
1022            },
1023        )?;
1024        Ok(handler.all_symbols)
1025    }
1026
1027    fn create_ksym_resolver(&self, path: &Path, file: &File) -> Result<Rc<KsymResolver>> {
1028        let resolver = KsymResolver::load_from_reader(file, path)?;
1029        let resolver = Rc::new(resolver);
1030        Ok(resolver)
1031    }
1032
1033    fn ksym_resolver<'slf>(&'slf self, path: &Path) -> Result<&'slf Rc<KsymResolver>> {
1034        let (file, cell) = self.ksym_cache.entry(path)?;
1035        let resolver = cell.get_or_try_init_(|| self.create_ksym_resolver(path, file))?;
1036        Ok(resolver)
1037    }
1038
1039    #[cfg(linux)]
1040    fn create_kernel_resolver(&self, src: &Kernel) -> Result<KernelResolver> {
1041        use crate::util::bytes_to_os_str;
1042        use crate::MaybeDefault;
1043
1044        let Kernel {
1045            kallsyms,
1046            vmlinux,
1047            kaslr_offset,
1048            debug_syms,
1049            _non_exhaustive: (),
1050        } = src;
1051
1052        let ksym_resolver = match kallsyms {
1053            MaybeDefault::Some(kallsyms) => {
1054                let ksym_resolver = self.ksym_resolver(kallsyms)?;
1055                Some(ksym_resolver)
1056            }
1057            MaybeDefault::Default => {
1058                let kallsyms = Path::new(KALLSYMS);
1059                let result = self.ksym_resolver(kallsyms);
1060                match result {
1061                    Ok(resolver) => Some(resolver),
1062                    Err(err) => {
1063                        log::warn!(
1064                            "failed to load kallsyms from {}: {err}; ignoring...",
1065                            kallsyms.display()
1066                        );
1067                        None
1068                    }
1069                }
1070            }
1071            MaybeDefault::None => None,
1072        };
1073
1074        let elf_resolver = match vmlinux {
1075            MaybeDefault::Some(vmlinux) => {
1076                let resolver = self
1077                    .elf_cache
1078                    .elf_resolver(vmlinux, self.maybe_debug_dirs(*debug_syms))?;
1079                Some(resolver)
1080            }
1081            MaybeDefault::Default => {
1082                let release = uname_release()?;
1083                let release = bytes_to_os_str(release.as_bytes())?;
1084                let basename = OsStr::new("vmlinux-");
1085                let dirs = [Path::new("/boot/"), Path::new("/usr/lib/debug/boot/")];
1086                let vmlinux = dirs.iter().find_map(|dir| {
1087                    let mut file = basename.to_os_string();
1088                    let () = file.push(release);
1089                    let path = dir.join(file);
1090                    path.exists().then_some(path)
1091                });
1092
1093                if let Some(vmlinux) = vmlinux {
1094                    let result = self
1095                        .elf_cache
1096                        .elf_resolver(&vmlinux, self.maybe_debug_dirs(*debug_syms));
1097                    match result {
1098                        Ok(resolver) => {
1099                            log::debug!("found suitable vmlinux file `{}`", vmlinux.display());
1100                            Some(resolver)
1101                        }
1102                        Err(err) => {
1103                            log::warn!(
1104                                "failed to load vmlinux `{}`: {err}; ignoring...",
1105                                vmlinux.display()
1106                            );
1107                            None
1108                        }
1109                    }
1110                } else {
1111                    None
1112                }
1113            }
1114            MaybeDefault::None => None,
1115        };
1116
1117        KernelResolver::new(ksym_resolver.cloned(), elf_resolver.cloned(), *kaslr_offset)
1118    }
1119
1120    #[cfg(not(linux))]
1121    fn create_kernel_resolver(&self, _src: &Kernel) -> Result<KernelResolver> {
1122        Err(Error::with_unsupported(
1123            "kernel address symbolization is unsupported on operating systems other than Linux",
1124        ))
1125    }
1126
1127    /// Cache some or all information associated with a symbolization
1128    /// source.
1129    ///
1130    /// Symbolization data is generally being cached when symbolization
1131    /// is performed. However, sometimes it is necessary to cache data
1132    /// early, for example to make subsequent symbolization requests as
1133    /// fast running as possible. In rare instances it can also be a
1134    /// matter of correctness. Process metadata such as VMAs and their
1135    /// offsets can be cached so that even after the processes exited
1136    /// symbolization requests can still be satisfied.
1137    ///
1138    /// If this method fails, any previously cached data is left
1139    /// untouched and will be used subsequently as if no failure
1140    /// occurred. Put differently, this method is only effectful on the
1141    /// happy path.
1142    #[cfg_attr(feature = "tracing", crate::log::instrument(skip_all, fields(cache = ?cache), err))]
1143    pub fn cache(&self, cache: &Cache) -> Result<()> {
1144        match cache {
1145            Cache::Elf(cache::Elf {
1146                path,
1147                _non_exhaustive: (),
1148            }) => {
1149                let _unpinned = self.elf_cache.unpin(path);
1150                let result = self
1151                    .elf_cache
1152                    .elf_resolver(path, self.maybe_debug_dirs(false));
1153                // Make sure to always pin the entry, even if bailing
1154                // due to a retrieval error. Basically, the semantics we
1155                // want to have is that if caching new data fails the
1156                // previously cached data is still present.
1157                let _pinned = self.elf_cache.pin(path);
1158                let resolver = result?;
1159
1160                let () = resolver.cache()?;
1161            }
1162            Cache::Process(cache::Process {
1163                pid,
1164                cache_vmas,
1165                _non_exhaustive: (),
1166            }) => {
1167                if *cache_vmas {
1168                    let parsed = maps::parse_filtered(*pid)?.collect::<Result<Box<_>>>()?;
1169                    let _prev = self.process_vma_cache.borrow_mut().insert(*pid, parsed);
1170                }
1171            }
1172        }
1173        Ok(())
1174    }
1175
1176    fn symbolize_impl<'in_, 'slf, A>(
1177        &'slf self,
1178        src: &Source,
1179        input: Input<A>,
1180    ) -> Result<A::OutTy<'slf>>
1181    where
1182        A: Copy + Addrs + 'in_,
1183    {
1184        match src {
1185            #[cfg(feature = "apk")]
1186            Source::Apk(Apk {
1187                path,
1188                debug_syms,
1189                _non_exhaustive: (),
1190            }) => {
1191                let addrs = match input {
1192                    Input::VirtOffset(..) => {
1193                        return Err(Error::with_unsupported(
1194                            "APK symbolization does not support virtual offset inputs",
1195                        ))
1196                    }
1197                    Input::AbsAddr(..) => {
1198                        return Err(Error::with_unsupported(
1199                            "APK symbolization does not support absolute address inputs",
1200                        ))
1201                    }
1202                    Input::FileOffset(offsets) => offsets,
1203                };
1204
1205                let symbols = addrs
1206                    .as_ref()
1207                    .iter()
1208                    .copied()
1209                    .map(
1210                        |offset| match self.apk_resolver(path, offset, *debug_syms)? {
1211                            Some((elf_resolver, elf_addr)) => self.symbolize_with_resolver(
1212                                elf_addr,
1213                                &Resolver::Cached(elf_resolver.as_symbolize()),
1214                            ),
1215                            None => Ok(Symbolized::Unknown(Reason::InvalidFileOffset)),
1216                        },
1217                    )
1218                    .collect();
1219                Ok(symbols)
1220            }
1221            #[cfg(feature = "breakpad")]
1222            Source::Breakpad(Breakpad {
1223                path,
1224                _non_exhaustive: (),
1225            }) => {
1226                let addrs = match input {
1227                    Input::VirtOffset(..) => {
1228                        return Err(Error::with_unsupported(
1229                            "Breakpad symbolization does not support virtual offset inputs",
1230                        ))
1231                    }
1232                    Input::AbsAddr(..) => {
1233                        return Err(Error::with_unsupported(
1234                            "Breakpad symbolization does not support absolute address inputs",
1235                        ))
1236                    }
1237                    Input::FileOffset(addrs) => addrs,
1238                };
1239
1240                let resolver = self.breakpad_resolver(path)?;
1241                let symbols = addrs
1242                    .as_ref()
1243                    .iter()
1244                    .copied()
1245                    .map(|addr| self.symbolize_with_resolver(addr, &Resolver::Cached(resolver)))
1246                    .collect();
1247                Ok(symbols)
1248            }
1249            Source::Elf(Elf {
1250                path,
1251                debug_syms,
1252                _non_exhaustive: (),
1253            }) => {
1254                let resolver = self
1255                    .elf_cache
1256                    .elf_resolver(path, self.maybe_debug_dirs(*debug_syms))?;
1257                match input {
1258                    Input::VirtOffset(addrs) => {
1259                        let symbols = addrs
1260                            .as_ref()
1261                            .iter()
1262                            .copied()
1263                            .map(|addr| {
1264                                self.symbolize_with_resolver(
1265                                    addr,
1266                                    &Resolver::Cached(resolver.deref()),
1267                                )
1268                            })
1269                            .collect();
1270                        Ok(symbols)
1271                    }
1272                    Input::AbsAddr(..) => Err(Error::with_unsupported(
1273                        "ELF symbolization does not support absolute address inputs",
1274                    )),
1275                    Input::FileOffset(offsets) => {
1276                        let symbols = offsets
1277                            .as_ref()
1278                            .iter()
1279                            .copied()
1280                            .map(
1281                                |offset| match resolver.file_offset_to_virt_offset(offset)? {
1282                                    Some(addr) => self.symbolize_with_resolver(
1283                                        addr,
1284                                        &Resolver::Cached(resolver.deref()),
1285                                    ),
1286                                    None => Ok(Symbolized::Unknown(Reason::InvalidFileOffset)),
1287                                },
1288                            )
1289                            .collect();
1290                        Ok(symbols)
1291                    }
1292                }
1293            }
1294            Source::Kernel(kernel) => {
1295                let addrs = match input {
1296                    Input::AbsAddr(addrs) => addrs,
1297                    Input::VirtOffset(..) => {
1298                        return Err(Error::with_unsupported(
1299                            "kernel symbolization does not support virtual offset inputs",
1300                        ))
1301                    }
1302                    Input::FileOffset(..) => {
1303                        return Err(Error::with_unsupported(
1304                            "kernel symbolization does not support file offset inputs",
1305                        ))
1306                    }
1307                };
1308
1309                let resolver = Rc::new(self.create_kernel_resolver(kernel)?);
1310                let symbols = addrs
1311                    .as_ref()
1312                    .iter()
1313                    .copied()
1314                    .map(|addr| {
1315                        self.symbolize_with_resolver(addr, &Resolver::Uncached(resolver.deref()))
1316                    })
1317                    .collect();
1318                Ok(symbols)
1319            }
1320            Source::Process(Process {
1321                pid,
1322                debug_syms,
1323                perf_map,
1324                map_files,
1325                vdso,
1326                _non_exhaustive: (),
1327            }) => {
1328                let addrs = match input {
1329                    Input::AbsAddr(addrs) => addrs,
1330                    Input::VirtOffset(..) => {
1331                        return Err(Error::with_unsupported(
1332                            "process symbolization does not support virtual offset inputs",
1333                        ))
1334                    }
1335                    Input::FileOffset(..) => {
1336                        return Err(Error::with_unsupported(
1337                            "process symbolization does not support file offset inputs",
1338                        ))
1339                    }
1340                };
1341
1342                let symbols = self.symbolize_user_addrs(
1343                    addrs.as_ref(),
1344                    *pid,
1345                    *debug_syms,
1346                    *perf_map,
1347                    *map_files,
1348                    *vdso,
1349                )?;
1350                Ok(symbols.into_iter().map(Ok).collect())
1351            }
1352            #[cfg(feature = "gsym")]
1353            Source::Gsym(Gsym::Data(GsymData {
1354                data,
1355                _non_exhaustive: (),
1356            })) => {
1357                let addrs = match input {
1358                    Input::VirtOffset(addrs) => addrs,
1359                    Input::AbsAddr(..) => {
1360                        return Err(Error::with_unsupported(
1361                            "Gsym symbolization does not support absolute address inputs",
1362                        ))
1363                    }
1364                    Input::FileOffset(..) => {
1365                        return Err(Error::with_unsupported(
1366                            "Gsym symbolization does not support file offset inputs",
1367                        ))
1368                    }
1369                };
1370
1371                let resolver = Rc::new(GsymResolver::with_data(data)?);
1372                let symbols = addrs
1373                    .as_ref()
1374                    .iter()
1375                    .copied()
1376                    .map(|addr| {
1377                        self.symbolize_with_resolver(addr, &Resolver::Uncached(resolver.deref()))
1378                    })
1379                    .collect();
1380                Ok(symbols)
1381            }
1382            #[cfg(feature = "gsym")]
1383            Source::Gsym(Gsym::File(GsymFile {
1384                path,
1385                _non_exhaustive: (),
1386            })) => {
1387                let addrs = match input {
1388                    Input::VirtOffset(addrs) => addrs,
1389                    Input::AbsAddr(..) => {
1390                        return Err(Error::with_unsupported(
1391                            "Gsym symbolization does not support absolute address inputs",
1392                        ))
1393                    }
1394                    Input::FileOffset(..) => {
1395                        return Err(Error::with_unsupported(
1396                            "Gsym symbolization does not support file offset inputs",
1397                        ))
1398                    }
1399                };
1400
1401                let resolver = self.gsym_resolver(path)?;
1402                let symbols = addrs
1403                    .as_ref()
1404                    .iter()
1405                    .copied()
1406                    .map(|addr| self.symbolize_with_resolver(addr, &Resolver::Cached(resolver)))
1407                    .collect();
1408                Ok(symbols)
1409            }
1410            Source::Phantom(()) => unreachable!(),
1411        }
1412    }
1413
1414    /// Symbolize a list of addresses.
1415    ///
1416    /// Symbolize a list of addresses using the provided symbolization
1417    /// [`Source`].
1418    ///
1419    /// This function returns exactly one [`Symbolized`] object for each input
1420    /// address, in the order of input addresses.
1421    ///
1422    /// The following table lists which features the various formats
1423    /// (represented by the [`Source`] argument) support. If a feature is not
1424    /// supported, the corresponding data in the [`Sym`] result will not be
1425    /// populated.
1426    ///
1427    /// | Format      | Feature                          | Supported by format? | Supported by blazesym? |
1428    /// |-------------|----------------------------------|:--------------------:|:----------------------:|
1429    /// | Breakpad    | symbol size                      | yes                  | yes                    |
1430    /// |             | source code location information | yes                  | yes                    |
1431    /// |             | inlined function information     | yes                  | yes                    |
1432    /// | ELF         | symbol size                      | yes                  | yes                    |
1433    /// |             | source code location information | no                   | N/A                    |
1434    /// |             | inlined function information     | no                   | N/A                    |
1435    /// | DWARF       | symbol size                      | yes                  | yes                    |
1436    /// |             | source code location information | yes                  | yes                    |
1437    /// |             | inlined function information     | yes                  | yes                    |
1438    /// | Gsym        | symbol size                      | yes                  | yes                    |
1439    /// |             | source code location information | yes                  | yes                    |
1440    /// |             | inlined function information     | yes                  | yes                    |
1441    /// | Ksym        | symbol size                      | no                   | N/A                    |
1442    /// |             | source code location information | no                   | N/A                    |
1443    /// |             | inlined function information     | no                   | N/A                    |
1444    /// | BPF program | symbol size                      | no (?)               | no                     |
1445    /// |             | source code location information | yes                  | yes                    |
1446    /// |             | inlined function information     | no                   | no                     |
1447    #[cfg_attr(feature = "tracing", crate::log::instrument(skip_all, fields(src = ?src, addrs = ?input.map(Hexify)), err))]
1448    pub fn symbolize<'slf>(
1449        &'slf self,
1450        src: &Source,
1451        input: Input<&[u64]>,
1452    ) -> Result<Vec<Symbolized<'slf>>> {
1453        // TODO: Use `Result::flatten` once our MSRV is 1.89.
1454        self.symbolize_impl(src, input).and_then(|result| result)
1455    }
1456
1457    /// Symbolize a single input address/offset.
1458    ///
1459    /// In general, it is more performant to symbolize addresses in batches
1460    /// using [`symbolize`][Self::symbolize]. However, in cases where only a
1461    /// single address is available, this method provides a more convenient API.
1462    #[cfg_attr(feature = "tracing", crate::log::instrument(skip_all, fields(src = ?src, input = format_args!("{input:#x?}")), err))]
1463    pub fn symbolize_single<'slf>(
1464        &'slf self,
1465        src: &Source,
1466        input: Input<u64>,
1467    ) -> Result<Symbolized<'slf>> {
1468        let input = input.map(|addr| [addr; 1]);
1469        self.symbolize_impl(src, input)?.0
1470    }
1471
1472    fn maybe_debug_dirs(&self, debug_syms: bool) -> Option<&[PathBuf]> {
1473        #[cfg(feature = "dwarf")]
1474        let debug_dirs = &self.debug_dirs;
1475        #[cfg(not(feature = "dwarf"))]
1476        let debug_dirs = &[];
1477        debug_syms.then_some(debug_dirs)
1478    }
1479}
1480
1481impl Default for Symbolizer {
1482    fn default() -> Self {
1483        Self::new()
1484    }
1485}
1486
1487
1488#[cfg(test)]
1489#[allow(clippy::missing_transmute_annotations)]
1490mod tests {
1491    use super::*;
1492
1493    use std::env::current_exe;
1494
1495    use test_fork::fork;
1496    use test_log::test;
1497
1498    use crate::maps::Perm;
1499    use crate::symbolize::CodeInfo;
1500
1501
1502    /// Exercise the `Debug` representation of various types.
1503    #[test]
1504    fn debug_repr() {
1505        let builder = Symbolizer::builder();
1506        assert_ne!(format!("{builder:?}"), "");
1507
1508        let symbolizer = builder.build();
1509        assert_ne!(format!("{symbolizer:?}"), "");
1510
1511        let test_elf = Path::new(&env!("CARGO_MANIFEST_DIR"))
1512            .join("data")
1513            .join("test-stable-addrs.bin");
1514        let parser = Rc::new(ElfParser::open(test_elf.as_path()).unwrap());
1515        let resolver = ElfResolver::from_parser(parser, None).unwrap();
1516        let resolver = Resolver::Cached(&resolver);
1517        assert_ne!(format!("{resolver:?}"), "");
1518        assert_ne!(format!("{:?}", resolver.inner()), "");
1519
1520        let entries = maps::parse(Pid::Slf).unwrap();
1521        let () = entries.for_each(|entry| {
1522            assert_ne!(format!("{:?}", DebugMapsEntry(&entry.unwrap())), "");
1523        });
1524    }
1525
1526    /// Test forcing a double check of all `Symbolizer` size changes.
1527    #[cfg(target_pointer_width = "64")]
1528    #[test]
1529    fn symbolizer_size() {
1530        // TODO: This size is rather large and we should look into
1531        //       minimizing it.
1532        assert_eq!(size_of::<Symbolizer>(), 1008);
1533    }
1534
1535    /// Check that we can correctly construct the source code path to a symbol.
1536    #[test]
1537    fn symbol_source_code_path() {
1538        let mut info = CodeInfo {
1539            dir: None,
1540            file: Cow::Borrowed(OsStr::new("source.c")),
1541            line: Some(1),
1542            column: Some(2),
1543            _non_exhaustive: (),
1544        };
1545        assert_eq!(info.to_path(), Path::new("source.c"));
1546
1547        info.dir = Some(Cow::Borrowed(Path::new("/foobar")));
1548        assert_eq!(info.to_path(), Path::new("/foobar/source.c"));
1549    }
1550
1551    /// Make sure that we can demangle symbols.
1552    #[test]
1553    fn demangle() {
1554        let symbol = Cow::Borrowed("_ZN4core9panicking9panic_fmt17h5f1a6fd39197ad62E");
1555        let name = maybe_demangle_impl(symbol, SrcLang::Rust);
1556        assert_eq!(name, "core::panicking::panic_fmt");
1557
1558        let symbol = Cow::Borrowed("_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc");
1559        let name = maybe_demangle_impl(symbol, SrcLang::Cpp);
1560        assert_eq!(
1561            name,
1562            "std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)"
1563        );
1564    }
1565
1566    /// Make sure that we error out as expected on certain input
1567    /// variants.
1568    #[test]
1569    fn unsupported_inputs() {
1570        let test_elf = Path::new(&env!("CARGO_MANIFEST_DIR"))
1571            .join("data")
1572            .join("test-stable-addrs.bin");
1573        let test_gsym = Path::new(&env!("CARGO_MANIFEST_DIR"))
1574            .join("data")
1575            .join("test-stable-addrs.gsym");
1576        let test_sym = Path::new(&env!("CARGO_MANIFEST_DIR"))
1577            .join("data")
1578            .join("test-stable-addrs.sym");
1579        let test_zip = Path::new(&env!("CARGO_MANIFEST_DIR"))
1580            .join("data")
1581            .join("test.zip");
1582
1583        let unsupported = [
1584            (
1585                Source::Apk(Apk::new(test_zip)),
1586                &[
1587                    Input::VirtOffset([40].as_slice()),
1588                    Input::AbsAddr([41].as_slice()),
1589                ][..],
1590            ),
1591            (
1592                Source::Breakpad(Breakpad::new(test_sym)),
1593                &[
1594                    Input::VirtOffset([50].as_slice()),
1595                    Input::AbsAddr([51].as_slice()),
1596                ][..],
1597            ),
1598            (
1599                Source::Process(Process::new(Pid::Slf)),
1600                &[
1601                    Input::VirtOffset([42].as_slice()),
1602                    Input::FileOffset([43].as_slice()),
1603                ][..],
1604            ),
1605            (
1606                Source::Kernel(Kernel::default()),
1607                &[
1608                    Input::VirtOffset([44].as_slice()),
1609                    Input::FileOffset([45].as_slice()),
1610                ][..],
1611            ),
1612            (
1613                Source::Elf(Elf::new(test_elf)),
1614                &[Input::AbsAddr([46].as_slice())][..],
1615            ),
1616            (
1617                Source::Gsym(Gsym::File(GsymFile::new(test_gsym))),
1618                &[
1619                    Input::AbsAddr([48].as_slice()),
1620                    Input::FileOffset([49].as_slice()),
1621                ][..],
1622            ),
1623        ];
1624
1625        let symbolizer = Symbolizer::new();
1626        for (src, inputs) in unsupported {
1627            for input in inputs {
1628                let err = symbolizer.symbolize(&src, *input).unwrap_err();
1629                assert_eq!(err.kind(), ErrorKind::Unsupported);
1630
1631                let input = input.try_to_single().unwrap();
1632                let err = symbolizer.symbolize_single(&src, input).unwrap_err();
1633                assert_eq!(err.kind(), ErrorKind::Unsupported);
1634            }
1635        }
1636    }
1637
1638    /// Check that we do not normalize addresses belonging to a
1639    /// "component" (as opposed to a file).
1640    #[test]
1641    fn symbolize_entry_various() {
1642        let addrs = [0x10000, 0x30000];
1643
1644        let mut entry_iter = [
1645            Ok(MapsEntry {
1646                range: 0x10000..0x20000,
1647                perm: Perm::default(),
1648                offset: 0,
1649                path_name: Some(PathName::Component("a-component".to_string())),
1650                build_id: None,
1651            }),
1652            Ok(MapsEntry {
1653                range: 0x30000..0x40000,
1654                perm: Perm::default(),
1655                offset: 0,
1656                path_name: None,
1657                build_id: None,
1658            }),
1659        ]
1660        .into_iter();
1661        let entries = |_addr| entry_iter.next();
1662
1663        let symbolizer = Symbolizer::new();
1664        let mut handler = SymbolizeHandler {
1665            symbolizer: &symbolizer,
1666            pid: Pid::Slf,
1667            debug_syms: false,
1668            perf_map: false,
1669            map_files: false,
1670            vdso: false,
1671            all_symbols: Vec::new(),
1672        };
1673        let () = normalize_sorted_user_addrs_with_entries(
1674            addrs.as_slice().iter().copied(),
1675            entries,
1676            &mut handler,
1677        )
1678        .unwrap();
1679
1680        let syms = handler.all_symbols;
1681        assert_eq!(syms.len(), 2);
1682        assert!(
1683            matches!(syms[0], Symbolized::Unknown(Reason::Unsupported)),
1684            "{:?}",
1685            syms[0]
1686        );
1687    }
1688
1689    /// Check that we instantiate only a minimal number of resolvers
1690    /// when using process symbolization with `map_files` (i.e., going
1691    /// through symbolic links).
1692    ///
1693    /// Effectively, this is an integration test that makes sure that we
1694    /// dereference symbolic links properly and not duplicate binary
1695    /// parsing over and over, but it peeks at implementation details.
1696    // Run in separate process to make sure that VMAs are not influenced
1697    // by tests running concurrently.
1698    #[fork]
1699    #[test]
1700    fn resolver_instantiation() {
1701        let exe = current_exe().unwrap();
1702        let addrs = maps::parse(Pid::Slf)
1703            .unwrap()
1704            .filter_map(|result| {
1705                let entry = result.unwrap();
1706                let path = entry.path_name.and_then(|path_name| {
1707                    path_name.as_path().map(|path| path.symbolic_path.clone())
1708                });
1709                if path == Some(exe.clone()) {
1710                    Some(entry.range.start)
1711                } else {
1712                    None
1713                }
1714            })
1715            .collect::<Box<[_]>>();
1716
1717        assert!(addrs.len() > 1, "{:x?}", addrs.as_ref());
1718
1719        let src = Source::Process(Process::new(Pid::Slf));
1720        let symbolizer = Symbolizer::new();
1721        // We don't really care whether we could symbolize the addresses
1722        // (unlikely), just that there was no error.
1723        let _result = symbolizer.symbolize(&src, Input::AbsAddr(&addrs)).unwrap();
1724
1725        assert_eq!(symbolizer.elf_cache.entry_count(), 1);
1726    }
1727}