breakpad_symbols/sym_file/
types.rs

1// Copyright 2015 Ted Mielczarek. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3
4use range_map::{Range, RangeMap};
5use std::collections::HashMap;
6
7/// A publicly visible linker symbol.
8#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
9pub struct PublicSymbol {
10    /// The symbol's address relative to the module's load address.
11    ///
12    /// This field is declared first so that the derived Ord implementation sorts
13    /// by address first. We take advantage of the sort order during address lookup.
14    pub address: u64,
15    /// The name of the symbol.
16    pub name: String,
17    /// The size of parameters passed to the function.
18    pub parameter_size: u32,
19}
20
21/// A mapping from machine code bytes to source line and file.
22#[derive(Clone, Debug, PartialEq, Eq)]
23pub struct SourceLine {
24    /// The start address relative to the module's load address.
25    pub address: u64,
26    /// The size of this range of instructions in bytes.
27    pub size: u32,
28    /// The source file name that generated this machine code.
29    ///
30    /// This is an index into `SymbolFile::files`.
31    pub file: u32,
32    /// The line number in `file` that generated this machine code.
33    pub line: u32,
34}
35
36/// A single range which is covered by an inlined function call.
37#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
38pub struct Inlinee {
39    /// The depth of the inline call.
40    pub depth: u32,
41    /// The start address relative to the module's load address.
42    pub address: u64,
43    /// The size of this range of instructions in bytes.
44    pub size: u32,
45    /// The source file which contains the function call.
46    ///
47    /// This is an index into `SymbolFile::files`.
48    pub call_file: u32,
49    /// The line number in `call_file` for the function call.
50    pub call_line: u32,
51    /// The function name, as an index into `SymbolFile::inline_origins`.
52    pub origin_id: u32,
53}
54
55/// A source-language function.
56#[derive(Clone, Debug, PartialEq, Eq)]
57pub struct Function {
58    /// The function's start address relative to the module's load address.
59    pub address: u64,
60    /// The size of the function in bytes.
61    pub size: u32,
62    /// The size of parameters passed to the function.
63    pub parameter_size: u32,
64    /// The name of the function as declared in the source.
65    pub name: String,
66    /// Source line information for this function.
67    pub lines: RangeMap<u64, SourceLine>,
68    /// Inlinee information for this function, sorted by (depth, address).
69    ///
70    /// Essentially this can be considered as "one vec per depth", just with
71    /// all those vecs concatenated into one.
72    ///
73    /// Inlinees form a nested structure, you can think of them like a flame graph.
74    pub inlinees: Vec<Inlinee>,
75}
76
77impl Function {
78    pub fn memory_range(&self) -> Option<Range<u64>> {
79        if self.size == 0 {
80            return None;
81        }
82        Some(Range::new(
83            self.address,
84            self.address.checked_add(self.size as u64)? - 1,
85        ))
86    }
87
88    /// Returns `(file_id, line, address, inline_origin)` of the line or inline record that
89    /// covers the given address at the outermost level (i.e. not inside any
90    /// inlined calls).
91    pub fn get_outermost_sourceloc(&self, addr: u64) -> Option<(u32, u32, u64, Option<u32>)> {
92        if let Some((call_file, call_line, address, origin)) = self.get_inlinee_at_depth(0, addr) {
93            return Some((call_file, call_line, address, Some(origin)));
94        }
95        // Otherwise we return the line record covering this address.
96        let line = self.lines.get(addr)?;
97        Some((line.file, line.line, line.address, None))
98    }
99
100    /// Returns `(file_id, line, address)` of the line record that covers the
101    /// given address. Line records describe locations at the deepest level of
102    /// inlining at that address.
103    ///
104    /// For example, if we have an "inline call stack" A -> B -> C at this
105    /// address, i.e. both the call to B and the call to C have been inlined all
106    /// the way into A (A being the "outer function"), then this method reports
107    /// locations in C.
108    pub fn get_innermost_sourceloc(&self, addr: u64) -> Option<(u32, u32, u64)> {
109        let line = self.lines.get(addr)?;
110        Some((line.file, line.line, line.address))
111    }
112
113    /// Returns `(call_file_id, call_line, address, inline_origin)` of the
114    /// inlinee record that covers the given address at the given depth.
115    ///
116    /// We start at depth zero. For example, if we have an "inline call stack"
117    /// A -> B -> C at an address, i.e. both the call to B and the call to C have
118    /// been inlined all the way into A (A being the "outer function"), then the
119    /// call A -> B is at level zero, and the call B -> C is at level one.
120    pub fn get_inlinee_at_depth(&self, depth: u32, addr: u64) -> Option<(u32, u32, u64, u32)> {
121        let inlinee = match self
122            .inlinees
123            .binary_search_by_key(&(depth, addr), |inlinee| (inlinee.depth, inlinee.address))
124        {
125            // Exact match
126            Ok(index) => &self.inlinees[index],
127            // No match, insertion index is zero => before first element
128            Err(0) => return None,
129            // No exact match, insertion index points after inlinee whose (depth, addr) is < what were looking for
130            // => subtract 1 to get candidate
131            Err(index) => &self.inlinees[index - 1],
132        };
133        if inlinee.depth != depth {
134            return None;
135        }
136        let end_address = inlinee.address.checked_add(inlinee.size as u64)?;
137        if addr < end_address {
138            Some((
139                inlinee.call_file,
140                inlinee.call_line,
141                inlinee.address,
142                inlinee.origin_id,
143            ))
144        } else {
145            None
146        }
147    }
148}
149
150/// Extra metadata that can be safely ignored, but may contain useful facts.
151#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
152pub enum Info {
153    /// The URL this file was downloaded from. This is added to symbol files
154    /// by HttpSymbolSupplier when it stores them in its cache, so that we
155    /// can populate that info even on a cache hit.
156    Url(String),
157    /// An info line we either don't know about or don't care about.
158    Unknown,
159}
160
161/// DWARF CFI rules for recovering registers at a specific address.
162#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
163pub struct CfiRules {
164    /// The address in question.
165    pub address: u64,
166    /// Postfix expressions to evaluate to recover register values.
167    pub rules: String,
168}
169
170/// Information used for unwinding stack frames using DWARF CFI.
171#[derive(Clone, Debug, PartialEq, Eq)]
172pub struct StackInfoCfi {
173    /// The initial rules for this address range.
174    pub init: CfiRules,
175    /// The size of this entire address range.
176    pub size: u32,
177    /// Additional rules to use at specified addresses.
178    pub add_rules: Vec<CfiRules>,
179}
180
181impl StackInfoCfi {
182    pub fn memory_range(&self) -> Option<Range<u64>> {
183        if self.size == 0 {
184            return None;
185        }
186        Some(Range::new(
187            self.init.address,
188            self.init.address.checked_add(self.size as u64)? - 1,
189        ))
190    }
191}
192
193/// Specific details about whether the frame uses a base pointer or has a program string to
194/// evaluate.
195#[derive(Clone, Debug, PartialEq, Eq)]
196pub enum WinFrameType {
197    /// This frame uses FPO-style data.
198    Fpo(StackInfoWin),
199    /// This frame uses new-style frame data, has a program string.
200    FrameData(StackInfoWin),
201    /// Some other type of frame.
202    Unhandled,
203}
204
205#[derive(Clone, Debug, PartialEq, Eq)]
206pub enum WinStackThing {
207    ProgramString(String),
208    AllocatesBasePointer(bool),
209}
210
211/// Information used for unwinding stack frames using Windows frame info.
212#[derive(Clone, Debug, PartialEq, Eq)]
213pub struct StackInfoWin {
214    /// The address in question.
215    pub address: u64,
216    /// The size of the address range covered.
217    pub size: u32,
218    /// The size of the function's prologue.
219    pub prologue_size: u32,
220    /// The size of the function's epilogue.
221    pub epilogue_size: u32,
222    /// The size of arguments passed to this function.
223    pub parameter_size: u32,
224    /// The number of bytes in the stack frame for callee-saves registers.
225    pub saved_register_size: u32,
226    /// The number of bytes in the stack frame for local variables.
227    pub local_size: u32,
228    /// The maximum number of bytes pushed onto the stack by this frame.
229    pub max_stack_size: u32,
230    /// A program string or boolean regarding a base pointer.
231    pub program_string_or_base_pointer: WinStackThing,
232}
233
234impl StackInfoWin {
235    pub fn memory_range(&self) -> Option<Range<u64>> {
236        if self.size == 0 {
237            return None;
238        }
239        Some(Range::new(
240            self.address,
241            self.address.checked_add(self.size as u64)? - 1,
242        ))
243    }
244}
245
246/// A parsed .sym file containing debug symbols.
247#[derive(Debug, PartialEq, Eq)]
248pub struct SymbolFile {
249    pub module_id: String,
250    pub debug_file: String,
251    /// The set of source files involved in compilation.
252    pub files: HashMap<u32, String>,
253    /// Publicly visible symbols.
254    pub publics: Vec<PublicSymbol>,
255    /// Functions.
256    pub functions: RangeMap<u64, Function>,
257    /// Function names for inlined functions.
258    pub inline_origins: HashMap<u32, String>,
259    /// DWARF CFI unwind information.
260    pub cfi_stack_info: RangeMap<u64, StackInfoCfi>,
261    /// Windows unwind information (frame data).
262    pub win_stack_framedata_info: RangeMap<u64, StackInfoWin>,
263    /// Windows unwind information (FPO data).
264    pub win_stack_fpo_info: RangeMap<u64, StackInfoWin>,
265
266    // Statistics which are strictly best-effort. Generally this
267    // means we might undercount in situations where we forgot to
268    // log an event.
269    /// If the symbol file was loaded from a URL, this is the url
270    pub url: Option<String>,
271    /// The number of times the parser found that the symbol file was
272    /// strictly ambiguous but simple heuristics repaired it. (e.g.
273    /// two STACK WIN entries overlapped, but the second was a suffix of
274    /// the first, so we just truncated the first.)
275    ///
276    /// Ideally dump_syms would never output this kind of thing, but it's
277    /// tolerable.
278    pub ambiguities_repaired: u64,
279    /// The number of times the parser found that the symbol file was
280    /// ambiguous and just randomly picked one of the options to make
281    /// progress.
282    ///
283    /// e.g. two STACK WIN entries with identical ranges but
284    /// different values, so one was discarded arbitrarily.
285    pub ambiguities_discarded: u64,
286    /// The number of times the parser found that a section of the file
287    /// (generally a line) was corrupt and discarded it.
288    ///
289    /// e.g. a STACK WIN entry where the `type` and `has_program` fields
290    /// have inconsistent values.
291    pub corruptions_discarded: u64,
292    /// The number of times the cfi evaluator failed out in a way that
293    /// implies the cfi entry is fundamentally corrupt.
294    ///
295    /// This isn't detectected during parsing for two reasons:
296    ///
297    /// * We don't parse cfi program strings until we are requested to
298    ///   execute them (there's ~millions of program strings which will
299    ///   never need to be parsed, so eagerly parsing them would be
300    ///   horribly expensive and pointless for anything but debug stats.)
301    ///
302    /// * A program string may technically parse but still be impossible
303    ///   to fully evaluate. For instance, it might try to pop values from
304    ///   its internal stack when there are none left.
305    ///
306    /// This number may be inflated if a corrupt cfi entry occurs in multiple
307    /// frames, as each attempted eval will be counted.
308    ///
309    /// This number does not include cfi evaluations that failed in ways that
310    /// may be a result of incorrect input memory/registers (e.g. failing
311    /// to evaluate a "dereference pointer" instruction because the pointer
312    /// was not mapped memory). In these situations the cfi entry *may*
313    /// still be correct.
314    pub cfi_eval_corruptions: u64,
315}