1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
//! This module defines the interface between minidump-processor and its [Symbolizer][].
//!
//! minidump-processor and the [Symbolizer][] communicate using a series of traits. The symbolizer
//! must provide implementations of these traits:
//!
//! * [SymbolProvider][] - provides symbolication, cfi evaluation, and debug statistics
//!     * Implemented by [Symbolizer][]
//!     * This actually doesn't need to be a trait in the current design, it exists to allow
//!       multiple symbolicators to be used together, via [MultiSymbolProvider][]. The other
//!       SymbolProviders have been removed, but I figured it would be a waste to throw out
//!       this minimally intrusive machinery.
//!
//! * [SymbolSupplier][] - maps a [Module][] to a [SymbolFile][]
//!     * minidump-processor does not directly use this, it's just there so the Symbolizer can
//!       generically handle different symbol fetching strategies (which minidump-processor
//!       selects and configures).
//!
//!
//!
//! While minidump-processor provides implementations of these traits:
//!
//! * [FrameSymbolizer][] - callbacks that symbolication uses to return its results.
//!     * Implemented by [StackFrame][crate::process_state::StackFrame]
//!     * Implemented by DummyFrame (private, for a stack scanning heuristic)
//! * [FrameWalker][] - callbacks that cfi eval uses to read callee state and write caller state.
//!     * Implemented by CfiStackWalker (private)
//!
//!
//!
//! The symbolizer is responsible for providing the following concrete functions, which
//! minidump-processor uses to select and configure the symbol fetching strategy:
//!
//! * [http_symbol_supplier][] - a [SymbolSupplier][] that can find symbols over HTTP (and cache).
//! * [simple_symbol_supplier][] - a [SymbolSupplier][] that can find symbols on disk.
//! * [string_symbol_supplier][] - a mock [SymbolSupplier][] for tests.
//!
//!
//!
//! And the following concrete types:
//!
//! * [Symbolizer][] - the main interface of the symbolizer, implementing [SymbolProvider][].
//!     * Wraps the [SymbolSupplier][] implementation that minidump-processor selects.
//!     * Queries the [SymbolSupplier] and manages the SymbolFiles however it pleases.
//! * [SymbolStats][] - debug statistic output.
//! * [SymbolFile][] - a payload that a [SymbolProvider][] returns to the Symbolizer.
//!     * Never handled by minidump-processor, public for the trait. (use this for whatever)
//! * [SymbolError][] - possible errors a [SymbolProvider][] can yield.
//!     * Never handled by minidump-processor, public for the trait. (use this for whatever)
//! * [FillSymbolError][] - possible errors for `fill_symbol`.
//!     * While this *is* handled by minidump-processor, it doesn't actually look at the value. It's
//!       just there to be An Error Type for the sake of API design.
//!
//!
//!
//! # Example
//!
//! ```rust
//! use minidump::Minidump;
//! use minidump_processor::{http_symbol_supplier, ProcessorOptions, Symbolizer};
//!
//! #[tokio::main]
//! async fn main() -> Result<(), ()> {
//!     // Read the minidump
//!     let dump = Minidump::read_path("../testdata/test.dmp").map_err(|_| ())?;
//!
//!     // Configure the symbolizer and processor
//!     let symbols_urls = vec![String::from("https://symbols.totallyrealwebsite.org")];
//!     let symbols_paths = vec![];
//!     let mut symbols_cache = std::env::temp_dir();
//!     symbols_cache.push("minidump-cache");
//!     let symbols_tmp = std::env::temp_dir();
//!     let timeout = std::time::Duration::from_secs(1000);
//!
//!     let options = ProcessorOptions::default();
//!     let provider = Symbolizer::new(http_symbol_supplier(
//!         symbols_paths,
//!         symbols_urls,
//!         symbols_cache,
//!         symbols_tmp,
//!         timeout,
//!     ));
//!
//!     let state = minidump_processor::process_minidump_with_options(&dump, &provider, options)
//!         .await
//!         .map_err(|_| ())?;
//!     state.print(&mut std::io::stdout()).map_err(|_| ())?;
//!     Ok(())
//! }
//! ```
//!

use std::collections::HashMap;
use std::path::PathBuf;

use async_trait::async_trait;
use minidump::Module;

pub use breakpad_symbols::{
    FileError, FileKind, FillSymbolError, FrameSymbolizer, FrameWalker, PendingSymbolStats,
    SymbolError, SymbolFile, SymbolStats, SymbolSupplier, Symbolizer,
};

/// The [`SymbolProvider`] is the main extension point for minidump processing.
///
/// It is primarily used by the `process_minidump` function to do stack
/// unwinding via CFI (call frame information) of a [`Module`] using the
/// `walk_frame` function.
///
/// The `fill_symbol` function is responsible for filling in the source location
/// (function, file, line triple) corresponding to an instruction address, as
/// well as a dual purpose of informing the stack scanning heuristic whether a
/// given instruction address might be valid inside of a [`Module`].
///
/// All the asynchronous trait methods can be called concurrently and need to
/// handle synchronization and request coalescing (based on the [`Module`]).
#[async_trait]
pub trait SymbolProvider {
    /// Fill symbol information in [`FrameSymbolizer`] using the instruction
    /// address from `frame`, and the module information from [`Module`].
    ///
    /// An Error indicates that no symbols could be found for the relevant
    /// module.
    ///
    /// This is used for filling in the resulting source location of the
    /// frame as a (function, file, line) triple, as well as providing the
    /// `parameter_size` which is used during CFI evaluation and stack walking.
    ///
    /// This function also serves a dual purpose in informing the stack scanning
    /// heuristic whether a potential instruction address points to a valid
    /// function or not.
    async fn fill_symbol(
        &self,
        module: &(dyn Module + Sync),
        frame: &mut (dyn FrameSymbolizer + Send),
    ) -> Result<(), FillSymbolError>;

    /// Tries to use CFI to walk the stack frame of the [`FrameWalker`]
    /// using the symbols of the given [`Module`].
    ///
    /// Output should be written using the [`FrameWalker`]'s `set_caller_*` APIs.
    async fn walk_frame(
        &self,
        module: &(dyn Module + Sync),
        walker: &mut (dyn FrameWalker + Send),
    ) -> Option<()>;

    /// Gets the path to the binary code file for a given module (or an Error).
    ///
    /// This might be used later on to inspect the assembly instructions of
    /// a module.
    async fn get_file_path(
        &self,
        module: &(dyn Module + Sync),
        file_kind: FileKind,
    ) -> Result<PathBuf, FileError>;

    /// Collect various statistics on the symbols.
    ///
    /// Keys are implementation dependent.
    /// For example the file name of the module (code_file's file name).
    ///
    /// This is only really intended to be queried after processing an
    /// entire minidump, and may have non-trivial overhead to compute.
    /// It's als possible we'd want it to also be able to contain stats
    /// that don't really make sense in intermediate states.
    ///
    /// In a world where you might want to have one SymbolSupplier shared
    /// by multiple instances of `process` running in parallel, it's unclear
    /// if this is the right abstraction. Perhaps we should have some kind
    /// of "session" abstraction so you can get stats about each individual
    /// processing task? Of course all pooling/caching between the tasks
    /// muddies things too.
    fn stats(&self) -> HashMap<String, SymbolStats> {
        HashMap::new()
    }

    /// Collect various pending statistics on the symbols.
    ///
    /// This is intended to be queried during processing to give some
    /// interactive feedback to the user, and so is fine to poll as
    /// much as you want, whenever you want.
    fn pending_stats(&self) -> PendingSymbolStats {
        PendingSymbolStats::default()
    }
}

#[derive(Default)]
pub struct MultiSymbolProvider {
    providers: Vec<Box<dyn SymbolProvider + Send + Sync>>,
}

impl MultiSymbolProvider {
    pub fn new() -> MultiSymbolProvider {
        Default::default()
    }

    pub fn add(&mut self, provider: Box<dyn SymbolProvider + Send + Sync>) {
        self.providers.push(provider);
    }
}

#[async_trait]
impl SymbolProvider for MultiSymbolProvider {
    async fn fill_symbol(
        &self,
        module: &(dyn Module + Sync),
        frame: &mut (dyn FrameSymbolizer + Send),
    ) -> Result<(), FillSymbolError> {
        // Return Ok if *any* symbol provider came back with Ok, so that the user can
        // distinguish between having no symbols at all and just not being able to
        // symbolize this particular frame.
        let mut best_result = Err(FillSymbolError {});
        for p in self.providers.iter() {
            let new_result = p.fill_symbol(module, frame).await;
            best_result = best_result.or(new_result);
        }
        best_result
    }

    async fn walk_frame(
        &self,
        module: &(dyn Module + Sync),
        walker: &mut (dyn FrameWalker + Send),
    ) -> Option<()> {
        for p in self.providers.iter() {
            let result = p.walk_frame(module, walker).await;
            if result.is_some() {
                return result;
            }
        }
        None
    }

    async fn get_file_path(
        &self,
        module: &(dyn Module + Sync),
        file_kind: FileKind,
    ) -> Result<PathBuf, FileError> {
        // Return Ok if *any* symbol provider came back with Ok
        let mut best_result = Err(FileError::NotFound);
        for p in self.providers.iter() {
            let new_result = p.get_file_path(module, file_kind).await;
            best_result = best_result.or(new_result);
        }
        best_result
    }

    fn stats(&self) -> HashMap<String, SymbolStats> {
        let mut result = HashMap::new();
        for p in self.providers.iter() {
            // FIXME: do more intelligent merging of the stats
            // (currently doesn't matter as only one provider reports non-empty stats).
            result.extend(p.stats());
        }
        result
    }

    fn pending_stats(&self) -> PendingSymbolStats {
        let mut result = PendingSymbolStats::default();
        for p in self.providers.iter() {
            // FIXME: do more intelligent merging of the stats
            // (currently doesn't matter as only one provider reports non-empty stats).
            result = p.pending_stats();
        }
        result
    }
}

#[async_trait]
impl SymbolProvider for Symbolizer {
    async fn fill_symbol(
        &self,
        module: &(dyn Module + Sync),
        frame: &mut (dyn FrameSymbolizer + Send),
    ) -> Result<(), FillSymbolError> {
        self.fill_symbol(module, frame).await
    }
    async fn walk_frame(
        &self,
        module: &(dyn Module + Sync),
        walker: &mut (dyn FrameWalker + Send),
    ) -> Option<()> {
        self.walk_frame(module, walker).await
    }
    async fn get_file_path(
        &self,
        module: &(dyn Module + Sync),
        file_kind: FileKind,
    ) -> Result<PathBuf, FileError> {
        self.get_file_path(module, file_kind).await
    }
    fn stats(&self) -> HashMap<String, SymbolStats> {
        self.stats()
    }
    fn pending_stats(&self) -> PendingSymbolStats {
        self.pending_stats()
    }
}

/// Gets a SymbolSupplier that looks up symbols by path or with urls.
///
/// * `symbols_paths` is a list of paths to check for symbol files. Paths
///   are searched in order until one returns a payload. If none do, then
///   urls are used.
///
/// * `symbols_urls` is a list of "base urls" that should all point to Tecken
///   servers. urls are queried in order until one returns a payload. If none
///   do, then it's an error.
///
/// * `symbols_cache` is a directory where an on-disk cache should be located.
///   This should be assumed to be a "temp" directory that another process
///   you don't control is garbage-collecting old files from (to provide an LRU cache).
///   The cache is queried before paths and urls (otherwise it wouldn't be much of a cache).
///
/// * `symbols_tmp` is a directory where symbol files should be downloaded to
///   before atomically swapping them into the cache. Has the same "temp"
///   assumptions as symbols_cache.
///
/// * `timeout` a maximum time limit for a symbol file download. This
///   is primarily defined to avoid getting stuck on buggy infinite downloads.
///   As of this writing, minidump-stackwalk defaults this to 1000 seconds. In
///   the event of a timeout, the supplier may still try to parse the truncated
///   download.
#[cfg(feature = "http")]
pub fn http_symbol_supplier(
    symbol_paths: Vec<PathBuf>,
    symbol_urls: Vec<String>,
    symbols_cache: PathBuf,
    symbols_tmp: PathBuf,
    timeout: std::time::Duration,
) -> impl SymbolSupplier {
    breakpad_symbols::HttpSymbolSupplier::new(
        symbol_urls,
        symbols_cache,
        symbols_tmp,
        symbol_paths,
        timeout,
    )
}

/// Gets a SymbolSupplier that looks up symbols by path.
///
/// Paths are queried in order until one returns a payload.
pub fn simple_symbol_supplier(symbol_paths: Vec<PathBuf>) -> impl SymbolSupplier {
    breakpad_symbols::SimpleSymbolSupplier::new(symbol_paths)
}

/// Gets a mock SymbolSupplier that just maps module names
/// to a string containing an entire breakpad .sym file, for tests.
pub fn string_symbol_supplier(modules: HashMap<String, String>) -> impl SymbolSupplier {
    breakpad_symbols::StringSymbolSupplier::new(modules)
}