breakpad_symbols/sym_file/types.rs
1// Copyright 2015 Ted Mielczarek. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3
4use range_map::{Range, RangeMap};
5use std::collections::HashMap;
6
7/// A publicly visible linker symbol.
8#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
9pub struct PublicSymbol {
10 /// The symbol's address relative to the module's load address.
11 ///
12 /// This field is declared first so that the derived Ord implementation sorts
13 /// by address first. We take advantage of the sort order during address lookup.
14 pub address: u64,
15 /// The name of the symbol.
16 pub name: String,
17 /// The size of parameters passed to the function.
18 pub parameter_size: u32,
19}
20
21/// A mapping from machine code bytes to source line and file.
22#[derive(Clone, Debug, PartialEq, Eq)]
23pub struct SourceLine {
24 /// The start address relative to the module's load address.
25 pub address: u64,
26 /// The size of this range of instructions in bytes.
27 pub size: u32,
28 /// The source file name that generated this machine code.
29 ///
30 /// This is an index into `SymbolFile::files`.
31 pub file: u32,
32 /// The line number in `file` that generated this machine code.
33 pub line: u32,
34}
35
36/// A single range which is covered by an inlined function call.
37#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
38pub struct Inlinee {
39 /// The depth of the inline call.
40 pub depth: u32,
41 /// The start address relative to the module's load address.
42 pub address: u64,
43 /// The size of this range of instructions in bytes.
44 pub size: u32,
45 /// The source file which contains the function call.
46 ///
47 /// This is an index into `SymbolFile::files`.
48 pub call_file: u32,
49 /// The line number in `call_file` for the function call.
50 pub call_line: u32,
51 /// The function name, as an index into `SymbolFile::inline_origins`.
52 pub origin_id: u32,
53}
54
55/// A source-language function.
56#[derive(Clone, Debug, PartialEq, Eq)]
57pub struct Function {
58 /// The function's start address relative to the module's load address.
59 pub address: u64,
60 /// The size of the function in bytes.
61 pub size: u32,
62 /// The size of parameters passed to the function.
63 pub parameter_size: u32,
64 /// The name of the function as declared in the source.
65 pub name: String,
66 /// Source line information for this function.
67 pub lines: RangeMap<u64, SourceLine>,
68 /// Inlinee information for this function, sorted by (depth, address).
69 ///
70 /// Essentially this can be considered as "one vec per depth", just with
71 /// all those vecs concatenated into one.
72 ///
73 /// Inlinees form a nested structure, you can think of them like a flame graph.
74 pub inlinees: Vec<Inlinee>,
75}
76
77impl Function {
78 pub fn memory_range(&self) -> Option<Range<u64>> {
79 if self.size == 0 {
80 return None;
81 }
82 Some(Range::new(
83 self.address,
84 self.address.checked_add(self.size as u64)? - 1,
85 ))
86 }
87
88 /// Returns `(file_id, line, address, inline_origin)` of the line or inline record that
89 /// covers the given address at the outermost level (i.e. not inside any
90 /// inlined calls).
91 pub fn get_outermost_sourceloc(&self, addr: u64) -> Option<(u32, u32, u64, Option<u32>)> {
92 if let Some((call_file, call_line, address, origin)) = self.get_inlinee_at_depth(0, addr) {
93 return Some((call_file, call_line, address, Some(origin)));
94 }
95 // Otherwise we return the line record covering this address.
96 let line = self.lines.get(addr)?;
97 Some((line.file, line.line, line.address, None))
98 }
99
100 /// Returns `(file_id, line, address)` of the line record that covers the
101 /// given address. Line records describe locations at the deepest level of
102 /// inlining at that address.
103 ///
104 /// For example, if we have an "inline call stack" A -> B -> C at this
105 /// address, i.e. both the call to B and the call to C have been inlined all
106 /// the way into A (A being the "outer function"), then this method reports
107 /// locations in C.
108 pub fn get_innermost_sourceloc(&self, addr: u64) -> Option<(u32, u32, u64)> {
109 let line = self.lines.get(addr)?;
110 Some((line.file, line.line, line.address))
111 }
112
113 /// Returns `(call_file_id, call_line, address, inline_origin)` of the
114 /// inlinee record that covers the given address at the given depth.
115 ///
116 /// We start at depth zero. For example, if we have an "inline call stack"
117 /// A -> B -> C at an address, i.e. both the call to B and the call to C have
118 /// been inlined all the way into A (A being the "outer function"), then the
119 /// call A -> B is at level zero, and the call B -> C is at level one.
120 pub fn get_inlinee_at_depth(&self, depth: u32, addr: u64) -> Option<(u32, u32, u64, u32)> {
121 let inlinee = match self
122 .inlinees
123 .binary_search_by_key(&(depth, addr), |inlinee| (inlinee.depth, inlinee.address))
124 {
125 // Exact match
126 Ok(index) => &self.inlinees[index],
127 // No match, insertion index is zero => before first element
128 Err(0) => return None,
129 // No exact match, insertion index points after inlinee whose (depth, addr) is < what were looking for
130 // => subtract 1 to get candidate
131 Err(index) => &self.inlinees[index - 1],
132 };
133 if inlinee.depth != depth {
134 return None;
135 }
136 let end_address = inlinee.address.checked_add(inlinee.size as u64)?;
137 if addr < end_address {
138 Some((
139 inlinee.call_file,
140 inlinee.call_line,
141 inlinee.address,
142 inlinee.origin_id,
143 ))
144 } else {
145 None
146 }
147 }
148}
149
150/// Extra metadata that can be safely ignored, but may contain useful facts.
151#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
152pub enum Info {
153 /// The URL this file was downloaded from. This is added to symbol files
154 /// by HttpSymbolSupplier when it stores them in its cache, so that we
155 /// can populate that info even on a cache hit.
156 Url(String),
157 /// An info line we either don't know about or don't care about.
158 Unknown,
159}
160
161/// DWARF CFI rules for recovering registers at a specific address.
162#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
163pub struct CfiRules {
164 /// The address in question.
165 pub address: u64,
166 /// Postfix expressions to evaluate to recover register values.
167 pub rules: String,
168}
169
170/// Information used for unwinding stack frames using DWARF CFI.
171#[derive(Clone, Debug, PartialEq, Eq)]
172pub struct StackInfoCfi {
173 /// The initial rules for this address range.
174 pub init: CfiRules,
175 /// The size of this entire address range.
176 pub size: u32,
177 /// Additional rules to use at specified addresses.
178 pub add_rules: Vec<CfiRules>,
179}
180
181impl StackInfoCfi {
182 pub fn memory_range(&self) -> Option<Range<u64>> {
183 if self.size == 0 {
184 return None;
185 }
186 Some(Range::new(
187 self.init.address,
188 self.init.address.checked_add(self.size as u64)? - 1,
189 ))
190 }
191}
192
193/// Specific details about whether the frame uses a base pointer or has a program string to
194/// evaluate.
195#[derive(Clone, Debug, PartialEq, Eq)]
196pub enum WinFrameType {
197 /// This frame uses FPO-style data.
198 Fpo(StackInfoWin),
199 /// This frame uses new-style frame data, has a program string.
200 FrameData(StackInfoWin),
201 /// Some other type of frame.
202 Unhandled,
203}
204
205#[derive(Clone, Debug, PartialEq, Eq)]
206pub enum WinStackThing {
207 ProgramString(String),
208 AllocatesBasePointer(bool),
209}
210
211/// Information used for unwinding stack frames using Windows frame info.
212#[derive(Clone, Debug, PartialEq, Eq)]
213pub struct StackInfoWin {
214 /// The address in question.
215 pub address: u64,
216 /// The size of the address range covered.
217 pub size: u32,
218 /// The size of the function's prologue.
219 pub prologue_size: u32,
220 /// The size of the function's epilogue.
221 pub epilogue_size: u32,
222 /// The size of arguments passed to this function.
223 pub parameter_size: u32,
224 /// The number of bytes in the stack frame for callee-saves registers.
225 pub saved_register_size: u32,
226 /// The number of bytes in the stack frame for local variables.
227 pub local_size: u32,
228 /// The maximum number of bytes pushed onto the stack by this frame.
229 pub max_stack_size: u32,
230 /// A program string or boolean regarding a base pointer.
231 pub program_string_or_base_pointer: WinStackThing,
232}
233
234impl StackInfoWin {
235 pub fn memory_range(&self) -> Option<Range<u64>> {
236 if self.size == 0 {
237 return None;
238 }
239 Some(Range::new(
240 self.address,
241 self.address.checked_add(self.size as u64)? - 1,
242 ))
243 }
244}
245
246/// A parsed .sym file containing debug symbols.
247#[derive(Debug, PartialEq, Eq)]
248pub struct SymbolFile {
249 pub module_id: String,
250 pub debug_file: String,
251 /// The set of source files involved in compilation.
252 pub files: HashMap<u32, String>,
253 /// Publicly visible symbols.
254 pub publics: Vec<PublicSymbol>,
255 /// Functions.
256 pub functions: RangeMap<u64, Function>,
257 /// Function names for inlined functions.
258 pub inline_origins: HashMap<u32, String>,
259 /// DWARF CFI unwind information.
260 pub cfi_stack_info: RangeMap<u64, StackInfoCfi>,
261 /// Windows unwind information (frame data).
262 pub win_stack_framedata_info: RangeMap<u64, StackInfoWin>,
263 /// Windows unwind information (FPO data).
264 pub win_stack_fpo_info: RangeMap<u64, StackInfoWin>,
265
266 // Statistics which are strictly best-effort. Generally this
267 // means we might undercount in situations where we forgot to
268 // log an event.
269 /// If the symbol file was loaded from a URL, this is the url
270 pub url: Option<String>,
271 /// The number of times the parser found that the symbol file was
272 /// strictly ambiguous but simple heuristics repaired it. (e.g.
273 /// two STACK WIN entries overlapped, but the second was a suffix of
274 /// the first, so we just truncated the first.)
275 ///
276 /// Ideally dump_syms would never output this kind of thing, but it's
277 /// tolerable.
278 pub ambiguities_repaired: u64,
279 /// The number of times the parser found that the symbol file was
280 /// ambiguous and just randomly picked one of the options to make
281 /// progress.
282 ///
283 /// e.g. two STACK WIN entries with identical ranges but
284 /// different values, so one was discarded arbitrarily.
285 pub ambiguities_discarded: u64,
286 /// The number of times the parser found that a section of the file
287 /// (generally a line) was corrupt and discarded it.
288 ///
289 /// e.g. a STACK WIN entry where the `type` and `has_program` fields
290 /// have inconsistent values.
291 pub corruptions_discarded: u64,
292 /// The number of times the cfi evaluator failed out in a way that
293 /// implies the cfi entry is fundamentally corrupt.
294 ///
295 /// This isn't detectected during parsing for two reasons:
296 ///
297 /// * We don't parse cfi program strings until we are requested to
298 /// execute them (there's ~millions of program strings which will
299 /// never need to be parsed, so eagerly parsing them would be
300 /// horribly expensive and pointless for anything but debug stats.)
301 ///
302 /// * A program string may technically parse but still be impossible
303 /// to fully evaluate. For instance, it might try to pop values from
304 /// its internal stack when there are none left.
305 ///
306 /// This number may be inflated if a corrupt cfi entry occurs in multiple
307 /// frames, as each attempted eval will be counted.
308 ///
309 /// This number does not include cfi evaluations that failed in ways that
310 /// may be a result of incorrect input memory/registers (e.g. failing
311 /// to evaluate a "dereference pointer" instruction because the pointer
312 /// was not mapped memory). In these situations the cfi entry *may*
313 /// still be correct.
314 pub cfi_eval_corruptions: u64,
315}