breakpad_symbols/
lib.rs

1// Copyright 2015 Ted Mielczarek. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3
4//! A library for working with [Google Breakpad][breakpad]'s
5//! text-format [symbol files][symbolfiles].
6//!
7//! See the [walker][] module for documentation on CFI evaluation.
8//!
9//! The highest-level API provided by this crate is to use the
10//! [`Symbolizer`][symbolizer] struct.
11//!
12//! [breakpad]: https://chromium.googlesource.com/breakpad/breakpad/+/master/
13//! [symbolfiles]: https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md
14//! [symbolizer]: struct.Symbolizer.html
15//!
16//! # Examples
17//!
18//! ```
19//! // std::env::set_current_dir::(env!("CARGO_MANIFEST_DIR"));
20//! use breakpad_symbols::{SimpleSymbolSupplier, Symbolizer, SimpleFrame, SimpleModule};
21//! use debugid::DebugId;
22//! use std::path::PathBuf;
23//! use std::str::FromStr;
24//!
25//! #[tokio::main]
26//! async fn main() {
27//!     let paths = vec!(PathBuf::from("../testdata/symbols/"));
28//!     let supplier = SimpleSymbolSupplier::new(paths);
29//!     let symbolizer = Symbolizer::new(supplier);
30//!
31//!     // Simple function name lookup with debug file, debug id, address.
32//!     let debug_id = DebugId::from_str("5A9832E5287241C1838ED98914E9B7FF1").unwrap();
33//!     assert_eq!(symbolizer.get_symbol_at_address("test_app.pdb", debug_id, 0x1010)
34//!         .await
35//!         .unwrap(),
36//!         "vswprintf");
37//! }
38//! ```
39
40use async_trait::async_trait;
41use cachemap2::CacheMap;
42use debugid::{CodeId, DebugId};
43use futures_util::lock::Mutex as FutMutex;
44use tracing::trace;
45
46use std::collections::HashMap;
47use std::fs;
48use std::path::PathBuf;
49use std::sync::Mutex;
50use std::{borrow::Cow, sync::Arc};
51
52pub use minidump_common::{traits::Module, utils::basename};
53pub use sym_file::walker;
54
55pub use crate::sym_file::{CfiRules, SymbolFile};
56
57#[cfg(feature = "http")]
58pub mod http;
59mod sym_file;
60
61#[cfg(feature = "http")]
62pub use http::*;
63
64// Re-exports for the purposes of the cfi_eval fuzzer. Not public API.
65#[doc(hidden)]
66#[cfg(feature = "fuzz")]
67pub mod fuzzing_private_exports {
68    pub use crate::sym_file::walker::{eval_win_expr_for_fuzzer, walk_with_stack_cfi};
69    pub use crate::sym_file::{StackInfoWin, WinStackThing};
70}
71
72/// Statistics on the symbols of a module.
73#[derive(Default, Debug, Clone)]
74pub struct SymbolStats {
75    /// If the module's symbols were downloaded, this is the url used.
76    pub symbol_url: Option<String>,
77    /// If the symbols were found and loaded into memory.
78    pub loaded_symbols: bool,
79    /// If we tried to parse the symbols, but failed.
80    pub corrupt_symbols: bool,
81    /// If the module's debug info had to be looked up, this is the debug info used.
82    pub extra_debug_info: Option<DebugInfoResult>,
83}
84
85/// Statistics on pending symbols.
86///
87/// Fetched with [`Symbolizer::pending_stats`].
88#[derive(Default, Debug, Clone)]
89pub struct PendingSymbolStats {
90    /// The number of symbols we have finished processing
91    /// (could be either successful or not, either way is fine).
92    pub symbols_processed: u64,
93    /// The number of symbols we have been asked to process.
94    pub symbols_requested: u64,
95}
96
97/// A `Module` implementation that holds arbitrary data.
98///
99/// This can be useful for getting symbols for a module when you
100/// have a debug id and filename but not an actual minidump. If you have a
101/// minidump, you should be using [`MinidumpModule`][minidumpmodule].
102///
103/// [minidumpmodule]: ../minidump/struct.MinidumpModule.html
104#[derive(Default)]
105pub struct SimpleModule {
106    pub base_address: Option<u64>,
107    pub size: Option<u64>,
108    pub code_file: Option<String>,
109    pub code_identifier: Option<CodeId>,
110    pub debug_file: Option<String>,
111    pub debug_id: Option<DebugId>,
112    pub version: Option<String>,
113}
114
115impl SimpleModule {
116    /// Create a `SimpleModule` with the given `debug_file` and `debug_id`.
117    ///
118    /// Uses `default` for the remaining fields.
119    pub fn new(debug_file: &str, debug_id: DebugId) -> SimpleModule {
120        SimpleModule {
121            debug_file: Some(String::from(debug_file)),
122            debug_id: Some(debug_id),
123            ..SimpleModule::default()
124        }
125    }
126
127    /// Create a `SimpleModule` with `debug_file`, `debug_id`, `code_file`, and `code_identifier`.
128    ///
129    /// Uses `default` for the remaining fields.
130    pub fn from_basic_info(
131        debug_file: Option<String>,
132        debug_id: Option<DebugId>,
133        code_file: Option<String>,
134        code_identifier: Option<CodeId>,
135    ) -> SimpleModule {
136        SimpleModule {
137            debug_file,
138            debug_id,
139            code_file,
140            code_identifier,
141            ..SimpleModule::default()
142        }
143    }
144}
145
146impl Module for SimpleModule {
147    fn base_address(&self) -> u64 {
148        self.base_address.unwrap_or(0)
149    }
150    fn size(&self) -> u64 {
151        self.size.unwrap_or(0)
152    }
153    fn code_file(&self) -> Cow<'_, str> {
154        self.code_file
155            .as_ref()
156            .map_or(Cow::from(""), |s| Cow::Borrowed(&s[..]))
157    }
158    fn code_identifier(&self) -> Option<CodeId> {
159        self.code_identifier.as_ref().cloned()
160    }
161    fn debug_file(&self) -> Option<Cow<'_, str>> {
162        self.debug_file.as_ref().map(|s| Cow::Borrowed(&s[..]))
163    }
164    fn debug_identifier(&self) -> Option<DebugId> {
165        self.debug_id
166    }
167    fn version(&self) -> Option<Cow<'_, str>> {
168        self.version.as_ref().map(|s| Cow::Borrowed(&s[..]))
169    }
170}
171
172/// Like `PathBuf::file_name`, but try to work on Windows or POSIX-style paths.
173fn leafname(path: &str) -> &str {
174    path.rsplit(['/', '\\']).next().unwrap_or(path)
175}
176
177/// If `filename` ends with `match_extension`, remove it. Append `new_extension` to the result.
178fn replace_or_add_extension(filename: &str, match_extension: &str, new_extension: &str) -> String {
179    let mut bits = filename.split('.').collect::<Vec<_>>();
180    if bits.len() > 1
181        && bits
182            .last()
183            .is_some_and(|e| e.to_lowercase() == match_extension)
184    {
185        bits.pop();
186    }
187    bits.push(new_extension);
188    bits.join(".")
189}
190
191/// A lookup we would like to perform for some file (sym, exe, pdb, dll, ...)
192#[derive(Debug, Clone)]
193pub struct FileLookup {
194    pub debug_id: String,
195    pub debug_file: String,
196    pub cache_rel: String,
197    pub server_rel: String,
198}
199
200/// Get a relative symbol path at which to locate symbols for `module`.
201///
202/// Symbols are generally stored in the layout used by Microsoft's symbol
203/// server and associated tools:
204/// `<debug filename>/<debug identifier>/<debug filename>.sym`. If
205/// `debug filename` ends with *.pdb* the leaf filename will have that
206/// removed.
207/// `extension` is the expected extension for the symbol filename, generally
208/// *sym* if Breakpad text format symbols are expected.
209///
210/// The debug filename and debug identifier can be found in the
211/// [first line][module_line] of the symbol file output by the dump_syms tool.
212/// You can use [this script][packagesymbols] to run dump_syms and put the
213/// resulting symbol files in the proper directory structure.
214///
215/// [module_line]: https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#MODULE-records
216/// [packagesymbols]: https://gist.github.com/luser/2ad32d290f224782fcfc#file-packagesymbols-py
217pub fn breakpad_sym_lookup(module: &(dyn Module + Sync)) -> Option<FileLookup> {
218    let debug_file = module.debug_file()?;
219    let debug_id = module.debug_identifier()?;
220
221    let leaf = leafname(&debug_file);
222    let filename = replace_or_add_extension(leaf, "pdb", "sym");
223    let rel_path = [leaf, &debug_id.breakpad().to_string(), &filename[..]].join("/");
224    Some(FileLookup {
225        cache_rel: rel_path.clone(),
226        server_rel: rel_path,
227        debug_id: debug_id.breakpad().to_string(),
228        debug_file: filename,
229    })
230}
231
232/// Get a relative symbol path at which to locate symbols for `module` using
233/// the code file and code identifier. This is helpful for Microsoft modules
234/// where we don't have a valid debug filename and debug id to retrieve the
235/// symbol file with and the symbol server supports looking up debug filename
236/// and debug id using the code file and code id.
237///
238/// If `code file` ends with *.dll* the leaf filename will have that removed.
239/// `extension` is the expected extension for the symbol filename, generally
240/// *sym* if Breakpad text format symbols are expected.
241///
242/// `<code file>/<code identifier>/<code file>.sym`
243pub fn code_info_breakpad_sym_lookup(module: &(dyn Module + Sync)) -> Option<String> {
244    let code_file = module.code_file();
245    let code_identifier = module.code_identifier()?;
246
247    if code_file.is_empty() {
248        return None;
249    }
250    let leaf = leafname(&code_file);
251    let filename = replace_or_add_extension(leaf, "dll", "sym");
252    let rel_path = [
253        leaf,
254        &code_identifier.to_string().to_uppercase(),
255        &filename[..],
256    ]
257    .join("/");
258
259    Some(rel_path)
260}
261
262/// Returns a lookup for this module's extra debuginfo (pdb)
263pub fn extra_debuginfo_lookup(module: &(dyn Module + Sync)) -> Option<FileLookup> {
264    let debug_file = module.debug_file()?;
265    let debug_id = module.debug_identifier()?;
266
267    let leaf = leafname(&debug_file);
268    let rel_path = [leaf, &debug_id.breakpad().to_string(), leaf].join("/");
269    Some(FileLookup {
270        cache_rel: rel_path.clone(),
271        server_rel: rel_path,
272        debug_id: debug_id.to_string(),
273        debug_file: leaf.to_string(),
274    })
275}
276
277/// Returns a lookup for this module's binary (exe, dll, so, dylib, ...)
278pub fn binary_lookup(module: &(dyn Module + Sync)) -> Option<FileLookup> {
279    // NOTE: to make dump_syms happy we're currently moving the bin
280    // to be next to the pdb. This changes where we would naively put it,
281    // hence the two different paths!
282
283    let code_file = module.code_file();
284    let code_id = module.code_identifier()?;
285    let debug_file = module.debug_file()?;
286    let debug_id = module.debug_identifier()?;
287
288    let bin_leaf = leafname(&code_file);
289    let debug_leaf = leafname(&debug_file);
290
291    Some(FileLookup {
292        cache_rel: [debug_leaf, &debug_id.breakpad().to_string(), bin_leaf].join("/"),
293        server_rel: [bin_leaf, code_id.as_ref(), bin_leaf].join("/"),
294        debug_id: debug_id.to_string(),
295        debug_file: debug_file.to_string(),
296    })
297}
298
299/// Mangles a lookup to mozilla's format where the last char is replaced by an underscore
300/// (and the file is wrapped in a CAB, but dump_syms handles that transparently).
301pub fn moz_lookup(mut lookup: FileLookup) -> FileLookup {
302    lookup.server_rel.pop().unwrap();
303    lookup.server_rel.push('_');
304    lookup
305}
306
307pub fn lookup(module: &(dyn Module + Sync), file_kind: FileKind) -> Option<FileLookup> {
308    match file_kind {
309        FileKind::BreakpadSym => breakpad_sym_lookup(module),
310        FileKind::Binary => binary_lookup(module),
311        FileKind::ExtraDebugInfo => extra_debuginfo_lookup(module),
312    }
313}
314
315/// Possible results of locating symbols for a module.
316///
317/// Because symbols may be found from different sources, symbol providers
318/// are usually configured to "cascade" into the next one whenever they report
319/// `NotFound`.
320///
321/// Cascading currently assumes that if any provider finds symbols for
322/// a module, all other providers will find the same symbols (if any).
323/// Therefore cascading will not be applied if a LoadError or ParseError
324/// occurs (because presumably, all the other sources will also fail to
325/// load/parse.)
326///
327/// In theory we could do some interesting things where we attempt to
328/// be more robust and actually merge together the symbols from multiple
329/// sources, but that would make it difficult to cache symbol files, and
330/// would rarely actually improve results.
331///
332/// Since symbol files can be on the order of a gigabyte(!) and downloaded
333/// from the network, aggressive caching is pretty important. The current
334/// approach is a nice balance of simple and effective.
335#[derive(Debug, thiserror::Error)]
336pub enum SymbolError {
337    /// Symbol file could not be found.
338    ///
339    /// In this case other symbol providers may still be able to find it!
340    #[error("symbol file not found")]
341    NotFound,
342    /// The module was lacking either the debug file or debug id, as such the
343    /// path of the symbol could not be generated.
344    #[error("the debug file or id were missing")]
345    MissingDebugFileOrId,
346    /// Symbol file could not be loaded into memory.
347    #[error("couldn't read input stream")]
348    LoadError(#[from] std::io::Error),
349    /// Symbol file was too corrupt to be parsed at all.
350    ///
351    /// Because symbol files are pretty modular, many corruptions/ambiguities
352    /// can be either repaired or discarded at a fairly granular level
353    /// (e.g. a bad STACK WIN line can be discarded without affecting anything
354    /// else). But sometimes we can't make any sense of the symbol file, and
355    /// you find yourself here.
356    #[error("parse error: {0} at line {1}")]
357    ParseError(&'static str, u64),
358}
359
360#[derive(Clone, Debug, thiserror::Error)]
361pub enum FileError {
362    #[error("file not found")]
363    NotFound,
364}
365
366/// An error produced by fill_symbol.
367#[derive(Debug)]
368pub struct FillSymbolError {
369    // We don't want to yield a full SymbolError for fill_symbol
370    // as this would involve cloning bulky Error strings every time
371    // someone requested symbols for a missing module.
372    //
373    // As it turns out there's currently no reason to care about *why*
374    // fill_symbol, so for now this is just a dummy type until we have
375    // something to put here.
376    //
377    // The only reason fill_symbol *can* produce an Err is so that
378    // the caller can distinguish between "we had symbols, but this address
379    // didn't map to a function name" and "we had no symbols for that module"
380    // (this is used as a heuristic for stack scanning).
381}
382
383impl PartialEq for SymbolError {
384    fn eq(&self, other: &SymbolError) -> bool {
385        matches!(
386            (self, other),
387            (SymbolError::NotFound, SymbolError::NotFound)
388                | (SymbolError::LoadError(_), SymbolError::LoadError(_))
389                | (SymbolError::ParseError(..), SymbolError::ParseError(..))
390        )
391    }
392}
393
394/// The result of a lookup by code_file/code_identifier against a symbol
395/// server.
396#[derive(Debug, Clone, PartialEq, Eq)]
397pub struct DebugInfoResult {
398    pub debug_file: String,
399    pub debug_identifier: DebugId,
400}
401
402/// The result of locating symbols, with debug info if it had to be looked up.
403#[derive(Debug, PartialEq, Eq)]
404pub struct LocateSymbolsResult {
405    pub symbols: SymbolFile,
406    pub extra_debug_info: Option<DebugInfoResult>,
407}
408
409/// A trait for things that can locate symbols for a given module.
410#[async_trait]
411pub trait SymbolSupplier {
412    /// Locate and load a symbol file for `module`.
413    ///
414    /// Implementations may use any strategy for locating and loading
415    /// symbols.
416    async fn locate_symbols(
417        &self,
418        module: &(dyn Module + Sync),
419    ) -> Result<LocateSymbolsResult, SymbolError>;
420
421    /// Locate a specific file associated with a `module`
422    ///
423    /// Implementations may use any strategy for locating and loading
424    /// symbols.
425    async fn locate_file(
426        &self,
427        module: &(dyn Module + Sync),
428        file_kind: FileKind,
429    ) -> Result<PathBuf, FileError>;
430}
431
432/// An implementation of `SymbolSupplier` that loads Breakpad text-format symbols from local disk
433/// paths.
434///
435/// See [`breakpad_sym_lookup`] for details on how paths are searched.
436pub struct SimpleSymbolSupplier {
437    /// Local disk paths in which to search for symbols.
438    paths: Vec<PathBuf>,
439}
440
441impl SimpleSymbolSupplier {
442    /// Instantiate a new `SimpleSymbolSupplier` that will search in `paths`.
443    pub fn new(paths: Vec<PathBuf>) -> SimpleSymbolSupplier {
444        SimpleSymbolSupplier { paths }
445    }
446}
447
448#[async_trait]
449impl SymbolSupplier for SimpleSymbolSupplier {
450    #[tracing::instrument(name = "symbols", level = "trace", skip_all, fields(module = crate::basename(&module.code_file())))]
451    async fn locate_symbols(
452        &self,
453        module: &(dyn Module + Sync),
454    ) -> Result<LocateSymbolsResult, SymbolError> {
455        let file_path = self
456            .locate_file(module, FileKind::BreakpadSym)
457            .await
458            .map_err(|_| SymbolError::NotFound)?;
459        let symbols = SymbolFile::from_file(&file_path).map_err(|e| {
460            trace!("SimpleSymbolSupplier failed: {}", e);
461            e
462        })?;
463        trace!("SimpleSymbolSupplier parsed file!");
464        Ok(LocateSymbolsResult {
465            symbols,
466            extra_debug_info: None,
467        })
468    }
469
470    #[tracing::instrument(level = "trace", skip(self, module), fields(module = crate::basename(&module.code_file())))]
471    async fn locate_file(
472        &self,
473        module: &(dyn Module + Sync),
474        file_kind: FileKind,
475    ) -> Result<PathBuf, FileError> {
476        trace!("SimpleSymbolSupplier search");
477        if let Some(lookup) = lookup(module, file_kind) {
478            for path in self.paths.iter() {
479                if path.is_file() && file_kind == FileKind::BreakpadSym {
480                    if let Ok(sf) = SymbolFile::from_file(path) {
481                        if sf.module_id == lookup.debug_id {
482                            trace!("SimpleSymbolSupplier found file {}", path.display());
483                            return Ok(path.to_path_buf());
484                        }
485                    }
486                } else if path.is_dir() {
487                    let test_path = path.join(lookup.cache_rel.clone());
488                    if fs::metadata(&test_path).ok().is_some_and(|m| m.is_file()) {
489                        trace!("SimpleSymbolSupplier found file {}", test_path.display());
490                        return Ok(test_path);
491                    }
492                }
493            }
494        } else {
495            trace!("SimpleSymbolSupplier could not build symbol_path");
496        }
497        Err(FileError::NotFound)
498    }
499}
500
501/// A SymbolSupplier that maps module names (code_files) to an in-memory string.
502///
503/// Intended for mocking symbol files in tests.
504#[derive(Default, Debug, Clone)]
505pub struct StringSymbolSupplier {
506    modules: HashMap<String, String>,
507    code_info_to_debug_info: HashMap<String, DebugInfoResult>,
508}
509
510impl StringSymbolSupplier {
511    /// Make a new StringSymbolSupplier with no modules.
512    pub fn new(modules: HashMap<String, String>) -> Self {
513        Self {
514            modules,
515            code_info_to_debug_info: HashMap::new(),
516        }
517    }
518
519    /// Perform a code_file/code_identifier lookup for a specific symbol server.
520    async fn lookup_debug_info_by_code_info(
521        &self,
522        module: &(dyn Module + Sync),
523    ) -> Option<DebugInfoResult> {
524        let lookup_path = code_info_breakpad_sym_lookup(module)?;
525        self.code_info_to_debug_info.get(&lookup_path).cloned()
526    }
527}
528
529#[async_trait]
530impl SymbolSupplier for StringSymbolSupplier {
531    #[tracing::instrument(name = "symbols", level = "trace", skip_all, fields(file = crate::basename(&module.code_file())))]
532    async fn locate_symbols(
533        &self,
534        module: &(dyn Module + Sync),
535    ) -> Result<LocateSymbolsResult, SymbolError> {
536        trace!("StringSymbolSupplier search");
537        if let Some(symbols) = self.modules.get(&*module.code_file()) {
538            trace!("StringSymbolSupplier found file");
539            let file = SymbolFile::from_bytes(symbols.as_bytes())?;
540            trace!("StringSymbolSupplier parsed file!");
541            return Ok(LocateSymbolsResult {
542                symbols: file,
543                extra_debug_info: self.lookup_debug_info_by_code_info(module).await,
544            });
545        }
546        trace!("StringSymbolSupplier could not find file");
547        Err(SymbolError::NotFound)
548    }
549
550    async fn locate_file(
551        &self,
552        _module: &(dyn Module + Sync),
553        _file_kind: FileKind,
554    ) -> Result<PathBuf, FileError> {
555        // StringSymbolSupplier can never find files, is for testing
556        Err(FileError::NotFound)
557    }
558}
559
560/// A trait for setting symbol information on something like a stack frame.
561pub trait FrameSymbolizer {
562    /// Get the program counter value for this frame.
563    fn get_instruction(&self) -> u64;
564    /// Set the name, base address, and parameter size of the function in
565    /// which this frame is executing.
566    fn set_function(&mut self, name: &str, base: u64, parameter_size: u32);
567    /// Set the source file and (1-based) line number this frame represents.
568    fn set_source_file(&mut self, file: &str, line: u32, base: u64);
569    /// Add an inline frame. This method can be called multiple times, in the
570    /// order "outside to inside".
571    fn add_inline_frame(&mut self, _name: &str, _file: Option<&str>, _line: Option<u32>) {}
572}
573
574pub trait FrameWalker {
575    /// Get the instruction address that we're trying to unwind from.
576    fn get_instruction(&self) -> u64;
577    /// Check whether the callee has a callee of its own.
578    fn has_grand_callee(&self) -> bool;
579    /// Get the number of bytes the callee's callee's parameters take up
580    /// on the stack (or 0 if unknown/invalid). This is needed for
581    /// STACK WIN unwinding.
582    fn get_grand_callee_parameter_size(&self) -> u32;
583    /// Get a register-sized value stored at this address.
584    fn get_register_at_address(&self, address: u64) -> Option<u64>;
585    /// Get the value of a register from the callee's frame.
586    fn get_callee_register(&self, name: &str) -> Option<u64>;
587    /// Set the value of a register for the caller's frame.
588    fn set_caller_register(&mut self, name: &str, val: u64) -> Option<()>;
589    /// Explicitly mark one of the caller's registers as invalid.
590    fn clear_caller_register(&mut self, name: &str);
591    /// Set whatever registers in the caller should be set based on the cfa (e.g. rsp).
592    fn set_cfa(&mut self, val: u64) -> Option<()>;
593    /// Set whatever registers in the caller should be set based on the return address (e.g. rip).
594    fn set_ra(&mut self, val: u64) -> Option<()>;
595}
596
597/// A simple implementation of `FrameSymbolizer` that just holds data.
598#[derive(Debug, Default)]
599pub struct SimpleFrame {
600    /// The program counter value for this frame.
601    pub instruction: u64,
602    /// The name of the function in which the current instruction is executing.
603    pub function: Option<String>,
604    /// The offset of the start of `function` from the module base.
605    pub function_base: Option<u64>,
606    /// The size, in bytes, that this function's parameters take up on the stack.
607    pub parameter_size: Option<u32>,
608    /// The name of the source file in which the current instruction is executing.
609    pub source_file: Option<String>,
610    /// The 1-based index of the line number in `source_file` in which the current instruction is
611    /// executing.
612    pub source_line: Option<u32>,
613    /// The offset of the start of `source_line` from the function base.
614    pub source_line_base: Option<u64>,
615}
616
617impl SimpleFrame {
618    /// Instantiate a `SimpleFrame` with instruction pointer `instruction`.
619    pub fn with_instruction(instruction: u64) -> SimpleFrame {
620        SimpleFrame {
621            instruction,
622            ..SimpleFrame::default()
623        }
624    }
625}
626
627impl FrameSymbolizer for SimpleFrame {
628    fn get_instruction(&self) -> u64 {
629        self.instruction
630    }
631    fn set_function(&mut self, name: &str, base: u64, parameter_size: u32) {
632        self.function = Some(String::from(name));
633        self.function_base = Some(base);
634        self.parameter_size = Some(parameter_size);
635    }
636    fn set_source_file(&mut self, file: &str, line: u32, base: u64) {
637        self.source_file = Some(String::from(file));
638        self.source_line = Some(line);
639        self.source_line_base = Some(base);
640    }
641}
642
643/// A type of file related to a module that you might want downloaded.
644#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
645pub enum FileKind {
646    /// A Breakpad symbol (.sym) file
647    BreakpadSym,
648    /// The native binary of a module ("code file") (.exe/.dll/.so/.dylib...)
649    Binary,
650    /// Extra debuginfo for a module ("debug file") (.pdb/...?)
651    ExtraDebugInfo,
652}
653
654// Can't make Module derive Hash, since then it can't be used as a trait
655// object (because the hash method is generic), so this is a hacky workaround.
656/// A key that uniquely identifies a module:
657///
658/// * code_file
659/// * code_id
660/// * debug_file
661/// * debug_id
662type ModuleKey = (String, Option<String>, Option<String>, Option<String>);
663
664/// Helper for deriving a hash key from a `Module` for `Symbolizer`.
665fn module_key(module: &(dyn Module + Sync)) -> ModuleKey {
666    (
667        module.code_file().to_string(),
668        module.code_identifier().map(|s| s.to_string()),
669        module.debug_file().map(|s| s.to_string()),
670        module.debug_identifier().map(|s| s.to_string()),
671    )
672}
673
674struct CachedAsyncResult<T, E> {
675    inner: FutMutex<Option<Arc<Result<T, E>>>>,
676}
677
678impl<T, E> Default for CachedAsyncResult<T, E> {
679    fn default() -> Self {
680        CachedAsyncResult {
681            inner: FutMutex::new(None),
682        }
683    }
684}
685
686impl<T, E> CachedAsyncResult<T, E> {
687    pub async fn get<'a, F, Fut>(&self, f: F) -> Arc<Result<T, E>>
688    where
689        F: FnOnce() -> Fut + 'a,
690        Fut: std::future::Future<Output = Result<T, E>> + 'a,
691    {
692        let mut guard = self.inner.lock().await;
693        if guard.is_none() {
694            *guard = Some(Arc::new(f().await));
695        }
696        guard.as_ref().unwrap().clone()
697    }
698}
699
700/// Symbolicate stack frames.
701///
702/// A `Symbolizer` manages loading symbols and looking up symbols in them
703/// including caching so that symbols for a given module are only loaded once.
704///
705/// Call [`Symbolizer::new`][new] to instantiate a `Symbolizer`. A Symbolizer
706/// requires a [`SymbolSupplier`][supplier] to locate symbols. If you have
707/// symbols on disk in the [customary directory layout][breakpad_sym_lookup], a
708/// [`SimpleSymbolSupplier`][simple] will work.
709///
710/// Use [`get_symbol_at_address`][get_symbol] or [`fill_symbol`][fill_symbol] to
711/// do symbol lookup.
712///
713/// [new]: struct.Symbolizer.html#method.new
714/// [supplier]: trait.SymbolSupplier.html
715/// [simple]: struct.SimpleSymbolSupplier.html
716/// [get_symbol]: struct.Symbolizer.html#method.get_symbol_at_address
717/// [fill_symbol]: struct.Symbolizer.html#method.fill_symbol
718pub struct Symbolizer {
719    /// Symbol supplier for locating symbols.
720    supplier: Box<dyn SymbolSupplier + Send + Sync + 'static>,
721    /// Cache of symbol locating results.
722    // TODO?: use lru-cache: https://crates.io/crates/lru-cache/
723    // note that using an lru-cache would mess up the fact that we currently
724    // use this for statistics collection. Splitting out statistics would be
725    // way messier but not impossible.
726    symbols: CacheMap<ModuleKey, CachedAsyncResult<SymbolFile, SymbolError>>,
727    pending_stats: Mutex<PendingSymbolStats>,
728    stats: Mutex<HashMap<String, SymbolStats>>,
729}
730
731impl Symbolizer {
732    /// Create a `Symbolizer` that uses `supplier` to locate symbols.
733    pub fn new<T: SymbolSupplier + Send + Sync + 'static>(supplier: T) -> Symbolizer {
734        Symbolizer {
735            supplier: Box::new(supplier),
736            symbols: CacheMap::default(),
737            pending_stats: Mutex::default(),
738            stats: Mutex::default(),
739        }
740    }
741
742    /// Helper method for non-minidump-using callers.
743    ///
744    /// Pass `debug_file` and `debug_id` describing a specific module,
745    /// and `address`, a module-relative address, and get back
746    /// a symbol in that module that covers that address, or `None`.
747    ///
748    /// See [the module-level documentation][module] for an example.
749    ///
750    /// [module]: index.html
751    pub async fn get_symbol_at_address(
752        &self,
753        debug_file: &str,
754        debug_id: DebugId,
755        address: u64,
756    ) -> Option<String> {
757        let k = (debug_file, debug_id);
758        let mut frame = SimpleFrame::with_instruction(address);
759        self.fill_symbol(&k, &mut frame).await.ok()?;
760        frame.function
761    }
762
763    /// Fill symbol information in `frame` using the instruction address
764    /// from `frame`, and the module information from `module`. If you're not
765    /// using a minidump module, you can use [`SimpleModule`][simplemodule] and
766    /// [`SimpleFrame`][simpleframe].
767    ///
768    /// An Error indicates that no symbols could be found for the relevant
769    /// module.
770    ///
771    /// # Examples
772    ///
773    /// ```
774    /// // std::env::set_current_dir(env!("CARGO_MANIFEST_DIR"));
775    /// use std::str::FromStr;
776    /// use debugid::DebugId;
777    /// use breakpad_symbols::{SimpleSymbolSupplier,Symbolizer,SimpleFrame,SimpleModule};
778    ///
779    /// #[tokio::main]
780    /// async fn main() {
781    ///     use std::path::PathBuf;
782    ///     let paths = vec!(PathBuf::from("../testdata/symbols/"));
783    ///     let supplier = SimpleSymbolSupplier::new(paths);
784    ///     let symbolizer = Symbolizer::new(supplier);
785    ///     let debug_id = DebugId::from_str("5A9832E5287241C1838ED98914E9B7FF1").unwrap();
786    ///     let m = SimpleModule::new("test_app.pdb", debug_id);
787    ///     let mut f = SimpleFrame::with_instruction(0x1010);
788    ///     let _ = symbolizer.fill_symbol(&m, &mut f).await;
789    ///     assert_eq!(f.function.unwrap(), "vswprintf");
790    ///     assert_eq!(f.source_file.unwrap(),
791    ///         r"c:\program files\microsoft visual studio 8\vc\include\swprintf.inl");
792    ///     assert_eq!(f.source_line.unwrap(), 51);
793    /// }
794    /// ```
795    ///
796    /// [simplemodule]: struct.SimpleModule.html
797    /// [simpleframe]: struct.SimpleFrame.html
798    pub async fn fill_symbol(
799        &self,
800        module: &(dyn Module + Sync),
801        frame: &mut (dyn FrameSymbolizer + Send),
802    ) -> Result<(), FillSymbolError> {
803        let cached_sym = self.get_symbols(module).await;
804        let sym = cached_sym
805            .as_ref()
806            .as_ref()
807            .map_err(|_| FillSymbolError {})?;
808        sym.fill_symbol(module, frame);
809        Ok(())
810    }
811
812    /// Collect various statistics on the symbols.
813    ///
814    /// Keys are the file name of the module (code_file's file name).
815    pub fn stats(&self) -> HashMap<String, SymbolStats> {
816        self.stats.lock().unwrap().clone()
817    }
818
819    /// Get live symbol stats for interactive updates.
820    pub fn pending_stats(&self) -> PendingSymbolStats {
821        self.pending_stats.lock().unwrap().clone()
822    }
823
824    /// Tries to use CFI to walk the stack frame of the FrameWalker
825    /// using the symbols of the given Module. Output will be written
826    /// using the FrameWalker's `set_caller_*` APIs.
827    pub async fn walk_frame(
828        &self,
829        module: &(dyn Module + Sync),
830        walker: &mut (dyn FrameWalker + Send),
831    ) -> Option<()> {
832        let cached_sym = self.get_symbols(module).await;
833        let sym = cached_sym.as_ref();
834        if let Ok(sym) = sym {
835            trace!("found symbols for address, searching for cfi entries");
836            sym.walk_frame(module, walker)
837        } else {
838            trace!("couldn't find symbols for address, cannot use cfi");
839            None
840        }
841    }
842
843    /// Gets the fully parsed SymbolFile for a given module (or an Error).
844    ///
845    /// This returns a CachedOperation which is guaranteed to already be resolved (lifetime stuff).
846    async fn get_symbols(
847        &self,
848        module: &(dyn Module + Sync),
849    ) -> Arc<Result<SymbolFile, SymbolError>> {
850        self.symbols
851            .cache_default(module_key(module))
852            .get(|| async {
853                trace!("locating symbols for module {}", module.code_file());
854                self.pending_stats.lock().unwrap().symbols_requested += 1;
855                let result = self.supplier.locate_symbols(module).await;
856                self.pending_stats.lock().unwrap().symbols_processed += 1;
857
858                let mut stats = SymbolStats::default();
859                match &result {
860                    Ok(res) => {
861                        stats.symbol_url.clone_from(&res.symbols.url);
862                        stats.loaded_symbols = true;
863                        stats.corrupt_symbols = false;
864                        stats.extra_debug_info.clone_from(&res.extra_debug_info);
865                    }
866                    Err(SymbolError::NotFound) => {
867                        stats.loaded_symbols = false;
868                    }
869                    Err(SymbolError::MissingDebugFileOrId) => {
870                        stats.loaded_symbols = false;
871                    }
872                    Err(SymbolError::LoadError(_)) => {
873                        stats.loaded_symbols = false;
874                    }
875                    Err(SymbolError::ParseError(..)) => {
876                        stats.loaded_symbols = true;
877                        stats.corrupt_symbols = true;
878                    }
879                }
880                let key = leafname(module.code_file().as_ref()).to_string();
881                self.stats.lock().unwrap().insert(key, stats);
882
883                result.map(|r| r.symbols)
884            })
885            .await
886    }
887
888    /// Gets the path to a file for a given module (or an Error).
889    ///
890    /// This returns a CachedOperation which is guaranteed to already be resolved (lifetime stuff).
891    pub async fn get_file_path(
892        &self,
893        module: &(dyn Module + Sync),
894        file_kind: FileKind,
895    ) -> Result<PathBuf, FileError> {
896        self.supplier.locate_file(module, file_kind).await
897    }
898}
899
900#[test]
901fn test_leafname() {
902    assert_eq!(leafname("c:\\foo\\bar\\test.pdb"), "test.pdb");
903    assert_eq!(leafname("c:/foo/bar/test.pdb"), "test.pdb");
904    assert_eq!(leafname("test.pdb"), "test.pdb");
905    assert_eq!(leafname("test"), "test");
906    assert_eq!(leafname("/path/to/test"), "test");
907}
908
909#[test]
910fn test_replace_or_add_extension() {
911    assert_eq!(
912        replace_or_add_extension("test.pdb", "pdb", "sym"),
913        "test.sym"
914    );
915    assert_eq!(
916        replace_or_add_extension("TEST.PDB", "pdb", "sym"),
917        "TEST.sym"
918    );
919    assert_eq!(replace_or_add_extension("test", "pdb", "sym"), "test.sym");
920    assert_eq!(
921        replace_or_add_extension("test.x", "pdb", "sym"),
922        "test.x.sym"
923    );
924    assert_eq!(replace_or_add_extension("", "pdb", "sym"), ".sym");
925    assert_eq!(replace_or_add_extension("test.x", "x", "y"), "test.y");
926}
927
928#[cfg(test)]
929mod test {
930
931    use super::*;
932    use std::fs::File;
933    use std::io::Write;
934    use std::path::Path;
935    use std::str::FromStr;
936
937    #[tokio::test]
938    async fn test_relative_symbol_path() {
939        let debug_id = DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap();
940        let m = SimpleModule::new("foo.pdb", debug_id);
941        assert_eq!(
942            &breakpad_sym_lookup(&m).unwrap().cache_rel,
943            "foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"
944        );
945
946        let m2 = SimpleModule::new("foo.pdb", debug_id);
947        assert_eq!(
948            &breakpad_sym_lookup(&m2).unwrap().cache_rel,
949            "foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"
950        );
951
952        let m3 = SimpleModule::new("foo.xyz", debug_id);
953        assert_eq!(
954            &breakpad_sym_lookup(&m3).unwrap().cache_rel,
955            "foo.xyz/ABCD1234ABCD1234ABCDABCD12345678a/foo.xyz.sym"
956        );
957
958        let m4 = SimpleModule::new("foo.xyz", debug_id);
959        assert_eq!(
960            &breakpad_sym_lookup(&m4).unwrap().cache_rel,
961            "foo.xyz/ABCD1234ABCD1234ABCDABCD12345678a/foo.xyz.sym"
962        );
963
964        let bad = SimpleModule::default();
965        assert!(breakpad_sym_lookup(&bad).is_none());
966
967        let bad2 = SimpleModule {
968            debug_file: Some("foo".to_string()),
969            ..SimpleModule::default()
970        };
971        assert!(breakpad_sym_lookup(&bad2).is_none());
972
973        let bad3 = SimpleModule {
974            debug_id: Some(debug_id),
975            ..SimpleModule::default()
976        };
977        assert!(breakpad_sym_lookup(&bad3).is_none());
978    }
979
980    #[tokio::test]
981    async fn test_relative_symbol_path_abs_paths() {
982        let debug_id = DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap();
983        {
984            let m = SimpleModule::new("/path/to/foo.bin", debug_id);
985            assert_eq!(
986                &breakpad_sym_lookup(&m).unwrap().cache_rel,
987                "foo.bin/ABCD1234ABCD1234ABCDABCD12345678a/foo.bin.sym"
988            );
989        }
990
991        {
992            let m = SimpleModule::new("c:/path/to/foo.pdb", debug_id);
993            assert_eq!(
994                &breakpad_sym_lookup(&m).unwrap().cache_rel,
995                "foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"
996            );
997        }
998
999        {
1000            let m = SimpleModule::new("c:\\path\\to\\foo.pdb", debug_id);
1001            assert_eq!(
1002                &breakpad_sym_lookup(&m).unwrap().cache_rel,
1003                "foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"
1004            );
1005        }
1006    }
1007
1008    #[tokio::test]
1009    async fn test_code_info_breakpad_sym_lookup() {
1010        // Test normal data
1011        let m = SimpleModule {
1012            code_file: Some("foo.dll".to_string()),
1013            code_identifier: Some(CodeId::from_str("64E782C570C4000").unwrap()),
1014            ..SimpleModule::default()
1015        };
1016        assert_eq!(
1017            &code_info_breakpad_sym_lookup(&m).unwrap(),
1018            "foo.dll/64E782C570C4000/foo.sym"
1019        );
1020
1021        let bad = SimpleModule::default();
1022        assert!(code_info_breakpad_sym_lookup(&bad).is_none());
1023
1024        let bad2 = SimpleModule {
1025            code_file: Some("foo".to_string()),
1026            ..SimpleModule::default()
1027        };
1028        assert!(code_info_breakpad_sym_lookup(&bad2).is_none());
1029
1030        let bad3 = SimpleModule {
1031            code_identifier: Some(CodeId::from_str("64E782C570C4000").unwrap()),
1032            ..SimpleModule::default()
1033        };
1034        assert!(code_info_breakpad_sym_lookup(&bad3).is_none());
1035    }
1036
1037    fn mksubdirs(path: &Path, dirs: &[&str]) -> Vec<PathBuf> {
1038        dirs.iter()
1039            .map(|dir| {
1040                let new_path = path.join(dir);
1041                fs::create_dir(&new_path).unwrap();
1042                new_path
1043            })
1044            .collect()
1045    }
1046
1047    fn write_symbol_file(path: &Path, contents: &[u8]) {
1048        let dir = path.parent().unwrap();
1049        if !fs::metadata(dir).ok().is_some_and(|m| m.is_dir()) {
1050            fs::create_dir_all(dir).unwrap();
1051        }
1052        let mut f = File::create(path).unwrap();
1053        f.write_all(contents).unwrap();
1054    }
1055
1056    fn write_good_symbol_file(path: &Path) {
1057        write_symbol_file(path, b"MODULE Linux x86 abcd1234 foo\n");
1058    }
1059
1060    fn write_bad_symbol_file(path: &Path) {
1061        write_symbol_file(path, b"this is not a symbol file\n");
1062    }
1063
1064    #[tokio::test]
1065    async fn test_simple_symbol_supplier() {
1066        let t = tempfile::tempdir().unwrap();
1067        let paths = mksubdirs(t.path(), &["one", "two"]);
1068
1069        let supplier = SimpleSymbolSupplier::new(paths.clone());
1070        let bad = SimpleModule::default();
1071        assert_eq!(
1072            supplier.locate_symbols(&bad).await,
1073            Err(SymbolError::NotFound)
1074        );
1075
1076        // Try loading symbols for each of two modules in each of the two
1077        // search paths.
1078        for &(path, file, id, sym) in [
1079            (
1080                &paths[0],
1081                "foo.pdb",
1082                DebugId::from_str("abcd1234-0000-0000-0000-abcd12345678-a").unwrap(),
1083                "foo.pdb/ABCD1234000000000000ABCD12345678a/foo.sym",
1084            ),
1085            (
1086                &paths[1],
1087                "bar.xyz",
1088                DebugId::from_str("ff990000-0000-0000-0000-abcd12345678-a").unwrap(),
1089                "bar.xyz/FF990000000000000000ABCD12345678a/bar.xyz.sym",
1090            ),
1091        ]
1092        .iter()
1093        {
1094            let m = SimpleModule::new(file, id);
1095            // No symbols present yet.
1096            assert_eq!(
1097                supplier.locate_symbols(&m).await,
1098                Err(SymbolError::NotFound)
1099            );
1100            write_good_symbol_file(&path.join(sym));
1101            // Should load OK now that it exists.
1102            assert!(
1103                supplier.locate_symbols(&m).await.is_ok(),
1104                "{}",
1105                format!("Located symbols for {sym}")
1106            );
1107        }
1108
1109        // Write a malformed symbol file, verify that it's found but fails to load.
1110        let debug_id = DebugId::from_str("ffff0000-0000-0000-0000-abcd12345678-a").unwrap();
1111        let mal = SimpleModule::new("baz.pdb", debug_id);
1112        let sym = "baz.pdb/FFFF0000000000000000ABCD12345678a/baz.sym";
1113        assert_eq!(
1114            supplier.locate_symbols(&mal).await,
1115            Err(SymbolError::NotFound)
1116        );
1117        write_bad_symbol_file(&paths[0].join(sym));
1118        let res = supplier.locate_symbols(&mal).await;
1119        assert!(
1120            matches!(res, Err(SymbolError::ParseError(..))),
1121            "{}",
1122            format!("Correctly failed to parse {sym}, result: {res:?}")
1123        );
1124    }
1125
1126    #[tokio::test]
1127    async fn test_symbolizer() {
1128        let t = tempfile::tempdir().unwrap();
1129        let path = t.path();
1130
1131        // TODO: This could really use a MockSupplier
1132        let supplier = SimpleSymbolSupplier::new(vec![PathBuf::from(path)]);
1133        let symbolizer = Symbolizer::new(supplier);
1134        let debug_id = DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap();
1135        let m1 = SimpleModule::new("foo.pdb", debug_id);
1136        write_symbol_file(
1137            &path.join("foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"),
1138            b"MODULE Linux x86 ABCD1234ABCD1234ABCDABCD12345678a foo
1139FILE 1 foo.c
1140FUNC 1000 30 10 some func
11411000 30 100 1
1142",
1143        );
1144        let mut f1 = SimpleFrame::with_instruction(0x1010);
1145        symbolizer.fill_symbol(&m1, &mut f1).await.unwrap();
1146        assert_eq!(f1.function.unwrap(), "some func");
1147        assert_eq!(f1.function_base.unwrap(), 0x1000);
1148        assert_eq!(f1.source_file.unwrap(), "foo.c");
1149        assert_eq!(f1.source_line.unwrap(), 100);
1150        assert_eq!(f1.source_line_base.unwrap(), 0x1000);
1151
1152        assert_eq!(
1153            symbolizer
1154                .get_symbol_at_address("foo.pdb", debug_id, 0x1010)
1155                .await
1156                .unwrap(),
1157            "some func"
1158        );
1159
1160        let debug_id = DebugId::from_str("ffff0000-0000-0000-0000-abcd12345678-a").unwrap();
1161        let m2 = SimpleModule::new("bar.pdb", debug_id);
1162        let mut f2 = SimpleFrame::with_instruction(0x1010);
1163        // No symbols present, should not find anything.
1164        assert!(symbolizer.fill_symbol(&m2, &mut f2).await.is_err());
1165        assert!(f2.function.is_none());
1166        assert!(f2.function_base.is_none());
1167        assert!(f2.source_file.is_none());
1168        assert!(f2.source_line.is_none());
1169        // Results should be cached.
1170        write_symbol_file(
1171            &path.join("bar.pdb/ffff0000000000000000ABCD12345678a/bar.sym"),
1172            b"MODULE Linux x86 ffff0000000000000000ABCD12345678a bar
1173FILE 53 bar.c
1174FUNC 1000 30 10 another func
11751000 30 7 53
1176",
1177        );
1178        assert!(symbolizer.fill_symbol(&m2, &mut f2).await.is_err());
1179        assert!(f2.function.is_none());
1180        assert!(f2.function_base.is_none());
1181        assert!(f2.source_file.is_none());
1182        assert!(f2.source_line.is_none());
1183        // This should also use cached results.
1184        assert!(symbolizer
1185            .get_symbol_at_address("bar.pdb", debug_id, 0x1010)
1186            .await
1187            .is_none());
1188    }
1189
1190    #[tokio::test]
1191    async fn test_extra_debug_info() {
1192        let debug_info = DebugInfoResult {
1193            debug_file: String::from_str("foo.pdb").unwrap(),
1194            debug_identifier: DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap(),
1195        };
1196
1197        let mut supplier = StringSymbolSupplier {
1198            modules: HashMap::new(),
1199            code_info_to_debug_info: HashMap::new(),
1200        };
1201        supplier.modules.insert(
1202            String::from_str("foo.pdb").unwrap(),
1203            String::from_str(
1204                "MODULE Linux x86 ABCD1234ABCD1234ABCDABCD12345678a foo
1205FILE 1 foo.c
1206FUNC 1000 30 10 some func
12071000 30 100 1
1208",
1209            )
1210            .unwrap(),
1211        );
1212        supplier.code_info_to_debug_info.insert(
1213            String::from_str("foo.pdb/64E782C570C4000/foo.pdb.sym").unwrap(),
1214            debug_info.clone(),
1215        );
1216
1217        let symbolizer = Symbolizer::new(supplier);
1218        let module = SimpleModule::from_basic_info(
1219            None,
1220            None,
1221            Some(String::from_str("foo.pdb").unwrap()),
1222            Some(CodeId::from_str("64E782C570C4000").unwrap()),
1223        );
1224
1225        let mut f1 = SimpleFrame::with_instruction(0x1010);
1226        symbolizer.fill_symbol(&module, &mut f1).await.unwrap();
1227        assert_eq!(f1.function.unwrap(), "some func");
1228        assert_eq!(f1.function_base.unwrap(), 0x1000);
1229        assert_eq!(f1.source_file.unwrap(), "foo.c");
1230        assert_eq!(f1.source_line.unwrap(), 100);
1231        assert_eq!(f1.source_line_base.unwrap(), 0x1000);
1232
1233        let sym_stats = symbolizer.stats();
1234        let stats = sym_stats.get("foo.pdb").unwrap();
1235        assert_eq!(stats.extra_debug_info, Some(debug_info));
1236    }
1237}