symbolic_debuginfo/function_builder.rs
1//! Contains [`FunctionBuilder`], which can be used to create a [`Function`]
2//! with inlinees and line records in the right structure.
3
4use std::{cmp::Reverse, collections::BinaryHeap};
5
6use crate::base::{FileInfo, Function, LineInfo};
7use symbolic_common::Name;
8
9/// Allows creating a [`Function`] from unordered line and inlinee records.
10///
11/// The created function will have the correct tree structure, all the line records will be on the
12/// correct function node within the tree, and all lines and inlinees will be sorted by address.
13pub struct FunctionBuilder<'s> {
14 /// The name of the outer function.
15 name: Name<'s>,
16 /// The compilation dir of the function.
17 compilation_dir: &'s [u8],
18 /// The address of the outer function.
19 address: u64,
20 /// The size of the outer function.
21 size: u64,
22 /// All inlinees at any depth inside this function.
23 ///
24 /// These are stored in a `BinaryHeap<Reverse<_>>` so that `.pop()` returns them ordered by
25 /// address, from low to high. (A [`BinaryHeap`] by itself is a max-heap, so we use [`Reverse`]
26 /// to make this a min-heap). We use a heap instead of a sorted `Vec` because we may need to
27 /// insert new elements during iteration, for inlinee splitting in `ensure_proper_nesting`.
28 inlinees: BinaryHeap<Reverse<FunctionBuilderInlinee<'s>>>,
29 /// The lines, in any order. They will be sorted in `finish()`. These record specify locations
30 /// at the innermost level of the inline stack at the line record's address.
31 lines: Vec<LineInfo<'s>>,
32}
33
34impl<'s> FunctionBuilder<'s> {
35 /// Create a new builder for a given outer function.
36 pub fn new(name: Name<'s>, compilation_dir: &'s [u8], address: u64, size: u64) -> Self {
37 Self {
38 name,
39 compilation_dir,
40 address,
41 size,
42 inlinees: BinaryHeap::new(),
43 lines: Vec::new(),
44 }
45 }
46
47 /// Add an inlinee record. This method can be called in any order.
48 ///
49 /// Inlinees which are called directly from the outer function have depth 0.
50 pub fn add_inlinee(
51 &mut self,
52 depth: u32,
53 name: Name<'s>,
54 address: u64,
55 size: u64,
56 call_file: FileInfo<'s>,
57 call_line: u64,
58 ) {
59 // An inlinee that starts before the function is obviously bogus.
60 if address < self.address {
61 return;
62 }
63
64 self.inlinees.push(Reverse(FunctionBuilderInlinee {
65 depth,
66 address,
67 size,
68 name,
69 call_file,
70 call_line,
71 }));
72 }
73
74 /// Add a line record, specifying the line at this address inside the innermost inlinee that
75 /// covers that address. This method can be called in any order.
76 pub fn add_leaf_line(
77 &mut self,
78 address: u64,
79 size: Option<u64>,
80 file: FileInfo<'s>,
81 line: u64,
82 ) {
83 // A line record that starts before the function is obviously bogus.
84 if address < self.address {
85 return;
86 }
87
88 self.lines.push(LineInfo {
89 address,
90 size,
91 file,
92 line,
93 });
94 }
95
96 /// Create the `Function`, consuming the builder.
97 pub fn finish(self) -> Function<'s> {
98 // Convert our data into the right shape.
99 // There are two big differences between what we have and what we want:
100 // - We have all inlinees in a flat list, but we want to create nested functions for them,
101 // forming a tree structure.
102 // - Our line records are in a flat list but they describe lines at different levels of
103 // inlining. We need to assign the line records to the correct function, at the correct
104 // level.
105 let FunctionBuilder {
106 name,
107 compilation_dir,
108 address,
109 size,
110 inlinees,
111 mut lines,
112 } = self;
113
114 let inlinees = ensure_proper_nesting(inlinees);
115
116 // Sort the lines by address.
117 lines.sort_by_key(|line| line.address);
118
119 let outer_function = Function {
120 address,
121 size,
122 name,
123 compilation_dir,
124 lines: Vec::new(),
125 inlinees: Vec::new(),
126 inline: false,
127 };
128 let outer_function_end = address + size;
129 let mut stack = FunctionBuilderStack::new(outer_function);
130
131 let mut inlinee_iter = inlinees.into_iter();
132 let mut line_iter = lines.into_iter();
133
134 let mut next_inlinee = inlinee_iter.next();
135 let mut next_line = line_iter.next();
136
137 // Iterate over lines and inlinees.
138 loop {
139 // If we have both a line and an inlinee at the same address, process the inlinee first.
140 // The line belongs "inside" that inlinee.
141 if next_inlinee.is_some()
142 && (next_line.is_none()
143 || next_inlinee.as_ref().unwrap().address
144 <= next_line.as_ref().unwrap().address)
145 {
146 let inlinee = next_inlinee.take().unwrap();
147 stack.flush_address(inlinee.address);
148 stack.flush_depth(inlinee.depth);
149
150 if inlinee.address >= outer_function_end {
151 break;
152 }
153
154 stack.last_mut().lines.push(LineInfo {
155 address: inlinee.address,
156 size: Some(inlinee.size),
157 file: inlinee.call_file,
158 line: inlinee.call_line,
159 });
160 stack.push(Function {
161 address: inlinee.address,
162 size: inlinee.size,
163 name: inlinee.name,
164 compilation_dir,
165 lines: Vec::new(),
166 inlinees: Vec::new(),
167 inline: true,
168 });
169 next_inlinee = inlinee_iter.next();
170 continue;
171 }
172
173 // Process the line.
174 if let Some(mut line) = next_line.take() {
175 stack.flush_address(line.address);
176
177 if line.address >= outer_function_end {
178 break;
179 }
180
181 // Ensure that Function lines are non-overlapping, by splitting lines so that they
182 // don't cross inlinee boundaries. If lines were overlapping across inlinee
183 // boundaries, then they would overlap with the lines that we create for the calls
184 // to those inlinees.
185 // We have to split up a line in two cases: If it overlaps with the end of the
186 // current inlinee, or if it overlaps with the start of the next inlinee.
187 if let Some(size) = line.size {
188 let line_end = line.address.saturating_add(size);
189 let current_innermost_fun_end = stack.last_mut().end_address();
190 let split_address = if let Some(next_inlinee) = next_inlinee.as_ref() {
191 next_inlinee.address.min(current_innermost_fun_end)
192 } else {
193 current_innermost_fun_end
194 };
195 if split_address < line_end {
196 // We have overlap! Split the line up.
197 let mut split_line = line.clone();
198 split_line.address = split_address;
199 split_line.size = Some(line_end - split_address);
200 line.size = Some(split_address - line.address);
201 stack.last_mut().lines.push(line);
202
203 next_line = Some(split_line);
204 continue;
205 }
206 }
207
208 // Handle the case where no overlap was detected.
209 stack.last_mut().lines.push(line);
210 next_line = line_iter.next();
211 continue;
212 }
213
214 // If we get here, we have run out of both lines and inlinees, and we're done.
215 break;
216 }
217
218 stack.finish()
219 }
220}
221
222/// Represents a contiguous address range which is covered by an inlined function call.
223#[derive(PartialEq, Eq, Clone, Debug)]
224struct FunctionBuilderInlinee<'s> {
225 /// The inline nesting level of this inline call. Calls from the outer function have depth 0.
226 pub depth: u32,
227 /// The start address.
228 pub address: u64,
229 /// The size in bytes.
230 pub size: u64,
231 /// The name of the function which is called.
232 pub name: Name<'s>,
233 /// The file name of the location of the call.
234 pub call_file: FileInfo<'s>,
235 /// The line number of the location of the call.
236 pub call_line: u64,
237}
238
239/// Implement ordering in DFS order, i.e. first by address and then by depth.
240impl PartialOrd for FunctionBuilderInlinee<'_> {
241 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
242 Some(self.cmp(other))
243 }
244}
245
246/// Implement ordering in DFS order, i.e. first by address and then by depth.
247impl Ord for FunctionBuilderInlinee<'_> {
248 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
249 (self.address, self.depth).cmp(&(other.address, other.depth))
250 }
251}
252
253/// Keeps track of the current inline stack, when iterating inlinees in DFS order.
254struct FunctionBuilderStack<'s> {
255 /// The current inline stack, elements are (end_address, function).
256 ///
257 /// Always contains at least one element: `stack[0].1` is the outer function.
258 stack: Vec<(u64, Function<'s>)>,
259}
260
261impl<'s> FunctionBuilderStack<'s> {
262 /// Creates a new stack, initialized with the outer function.
263 pub fn new(outer_function: Function<'s>) -> Self {
264 let end_address = outer_function.address.saturating_add(outer_function.size);
265 let stack = vec![(end_address, outer_function)];
266 Self { stack }
267 }
268
269 /// Returns an exclusive reference to the function at the top of the stack, i.e. the "deepest"
270 /// function.
271 pub fn last_mut(&mut self) -> &mut Function<'s> {
272 &mut self.stack.last_mut().unwrap().1
273 }
274
275 /// Pops the deepest function from the stack and adds it to the inlinees of its caller.
276 fn pop(&mut self) {
277 assert!(self.stack.len() > 1);
278
279 // Pop the function and add it to its parent function's list of inlinees.
280 let fun = self.stack.pop().unwrap().1;
281 self.stack.last_mut().unwrap().1.inlinees.push(fun);
282 }
283
284 /// Finish and pop all functions that end at or before this address.
285 pub fn flush_address(&mut self, address: u64) {
286 while self.stack.len() > 1 && self.stack.last().unwrap().0 <= address {
287 self.pop();
288 }
289 }
290
291 /// Finish and pop all functions that are at the given depth / "nesting level" or deeper.
292 pub fn flush_depth(&mut self, depth: u32) {
293 while self.stack.len() > depth as usize + 1 {
294 self.pop();
295 }
296 }
297
298 /// Push an inlinee to the stack.
299 pub fn push(&mut self, inlinee: Function<'s>) {
300 let end_address = inlinee.address.saturating_add(inlinee.size);
301 self.stack.push((end_address, inlinee));
302 }
303
304 /// Finish the entire stack and return the outer function.
305 pub fn finish(mut self) -> Function<'s> {
306 while self.stack.len() > 1 {
307 self.pop();
308 }
309 self.stack.pop().unwrap().1
310 }
311}
312
313/// Converts the `BinaryHeap` of inlinees into a sorted `Vec` of inlinees, while ensuring proper
314/// inlinee nesting.
315///
316/// # Background
317///
318/// Inlinees are organized in a tree, where each node has a start address and an end address.
319/// For this tree to be well-formed, sibling nodes need to have non-overlapping address ranges, and
320/// child nodes must not extend beyond their parent nodes.
321///
322/// Overlapping siblings are always considered malformed data; if the input data has overlapping
323/// siblings there is a bug in the system which produced the input data.
324///
325/// However, child nodes may legitimately extend beyond their parents in the input data.
326/// This happens when `FunctionBuilder` is used for Breakpad .sym files.
327///
328/// This function splits and redistributes such nodes over other parents. In the output data from
329/// this function, child nodes will not extend beyond their parents.
330///
331/// Example:
332///
333/// ```plain
334/// 0x0..0x8: a() @ a.cpp:10 -> b() @ b.cpp:20 -> c() @ c.cpp:30
335/// 0x8..0xd: a() @ a.cpp:15 -> b() @ b.cpp:20 -> c() @ c.cpp:30
336///
337/// may be equivalently expressed as:
338///
339/// Representation A: Properly nested
340/// - b() called from a.cpp:10 at 0x0..0x8
341/// - c() called from b.cpp:20 at 0x0..0x8
342/// - b() called from a.cpp:15 at 0x8..0xd
343/// - c() called from b.cpp:20 at 0x8..0xd
344///
345/// or as:
346///
347/// Representation B: Improperly nested, but unambiguous and compact
348/// - b() called from a.cpp:10 at 0x0..0x8
349/// - c() called from b.cpp:20 at 0x0..0xd <-- extends beyond parent
350/// - b() called from a.cpp:10 at 0x0..0x8
351///
352/// In Representation B, the two c() inlinees were merged into one, even though they have different
353/// parents. This is valid input.
354///
355/// This function will convert Representation B into Representation A.
356/// ```
357///
358/// To create a well-formed tree, this function does the following:
359///
360/// - If a node overlaps with its previous sibling, the node's start address is adjusted to avoid
361/// overlap.
362/// - If a node extends beyond its parent, it is split into two nodes.
363/// - Free-floating nodes are removed, i.e. a node at depth N+1 at an address at which there is no
364/// node at depth N.
365fn ensure_proper_nesting(
366 mut inlinees: BinaryHeap<Reverse<FunctionBuilderInlinee>>,
367) -> Vec<FunctionBuilderInlinee> {
368 let mut result = Vec::with_capacity(inlinees.len());
369
370 // This stack contains, at index i, the end address of the most recent inlinee at depth i.
371 // The length of this stack is the current depth. During the iteration we traverse the inlinee
372 // tree in DFS order, so the "current depth" changes as we enter and exit inlinees.
373 let mut end_address_stack = Vec::new();
374
375 // Iterate the inlinees, ordered by (address, depth), in order of increasing address.
376 // We take each inlinee out of `inlinees`, check it, and then append it to `result`.
377 while let Some(Reverse(mut inlinee)) = inlinees.pop() {
378 let depth = inlinee.depth as usize;
379 let start_address = inlinee.address;
380 let mut end_address = match start_address.checked_add(inlinee.size) {
381 Some(end_address) => end_address,
382 None => continue,
383 };
384
385 if end_address_stack.len() < depth {
386 // This node has no parent. Skip it.
387 continue;
388 }
389
390 if let Some(&previous_sibling_end_address) = end_address_stack.get(depth) {
391 if end_address <= previous_sibling_end_address {
392 // This node is completely engulfed by its previous sibling.
393 // Skip this node.
394 continue;
395 }
396 if start_address < previous_sibling_end_address {
397 let new_start_address = previous_sibling_end_address;
398 // Resolve overlap by adjusting this node's start address and size, keeping its
399 // end address unchanged.
400 inlinee.address = new_start_address;
401 inlinee.size = end_address - new_start_address;
402 // Put it back into the heap so that it is processed in the correct order.
403 inlinees.push(Reverse(inlinee));
404 continue;
405 }
406 }
407
408 end_address_stack.truncate(depth);
409 debug_assert_eq!(end_address_stack.len(), depth, "due to len() check + trunc");
410
411 if let Some(&caller_end_address) = end_address_stack.last() {
412 // We know: start_address >= caller_start_address, ensured by the sort order.
413 // But is start_address < caller_end_address?
414 if start_address >= caller_end_address {
415 // This node is completely outside its parent. This must mean that it does not have
416 // a parent, otherwise we would have encountered its parent.
417 // Skip this node.
418 continue;
419 }
420 if end_address > caller_end_address {
421 // This node extends beyond its caller. Split it into two.
422 let split_address = caller_end_address;
423 let mut split_inlinee = inlinee.clone();
424 split_inlinee.address = split_address;
425 split_inlinee.size = end_address - split_address;
426 inlinees.push(Reverse(split_inlinee));
427
428 // Shorten the current inlinee.
429 inlinee.size = split_address - start_address;
430 end_address = split_address;
431 }
432 } else {
433 // This is an inlinee at depth 0, i.e. this inlinee is directly called by the outer
434 // function. Those inlinees can be as long as they want; we don't try to
435 // force them to stay within the outer function's address range here.
436 }
437 result.push(inlinee);
438 end_address_stack.push(end_address); // this goes into end_address_stack[depth]
439 }
440 result
441}
442
443#[cfg(test)]
444mod tests {
445 use super::*;
446
447 #[test]
448 fn test_simple() {
449 // 0x10 - 0x40: foo in foo.c on line 1
450 let mut builder = FunctionBuilder::new(Name::from("foo"), &[], 0x10, 0x30);
451 builder.add_leaf_line(0x10, Some(0x30), FileInfo::from_filename(b"foo.c"), 1);
452 let func = builder.finish();
453
454 assert_eq!(func.name.as_str(), "foo");
455 assert_eq!(&func.lines, &[LineInfo::new(0x10, 0x30, b"foo.c", 1)]);
456 }
457
458 #[test]
459 fn test_inlinee() {
460 // 0x10 - 0x20: foo in foo.c on line 1
461 // 0x20 - 0x40: bar in bar.c on line 1
462 // - inlined into: foo in foo.c on line 2
463 let mut builder = FunctionBuilder::new(Name::from("foo"), &[], 0x10, 0x30);
464 builder.add_inlinee(
465 0,
466 Name::from("bar"),
467 0x20,
468 0x20,
469 FileInfo::from_filename(b"foo.c"),
470 2,
471 );
472 builder.add_leaf_line(0x10, Some(0x10), FileInfo::from_filename(b"foo.c"), 1);
473 builder.add_leaf_line(0x20, Some(0x20), FileInfo::from_filename(b"bar.c"), 1);
474 let func = builder.finish();
475
476 // the outer function has two line records, one for itself, the other for the inlined call
477 assert_eq!(func.name.as_str(), "foo");
478 assert_eq!(
479 &func.lines,
480 &[
481 LineInfo::new(0x10, 0x10, b"foo.c", 1),
482 LineInfo::new(0x20, 0x20, b"foo.c", 2)
483 ]
484 );
485
486 assert_eq!(func.inlinees.len(), 1);
487 assert_eq!(func.inlinees[0].name.as_str(), "bar");
488 assert_eq!(
489 &func.inlinees[0].lines,
490 &[LineInfo::new(0x20, 0x20, b"bar.c", 1)]
491 );
492 }
493
494 #[test]
495 fn test_longer_line_record() {
496 // Consider the following code:
497 //
498 // ```
499 // | fn parent() {
500 // 1 | child1();
501 // 2 | child2();
502 // | }
503 // 1 | fn child1() { child2() }
504 // 1 | fn child2() {}
505 // ```
506 //
507 // we assume here that we transitively inline `child2` all the way into `parent`.
508 // but we only have a single line record for that whole chunk of code,
509 // even though the inlining hierarchy specifies two different call sites
510 //
511 // addr: 0x10 0x20 0x30 0x40 0x50
512 // v v v v v
513 // # DWARF hierarchy
514 // parent: |-------------------|
515 // child1: |----| (called from parent.c line 1)
516 // child2: |----| (called from child1.c line 1)
517 // |----| (called from parent.c line 2)
518 // # line records
519 // |----| |----| (parent.c line 1)
520 // |---------| (child2.c line 1)
521
522 let mut builder = FunctionBuilder::new(Name::from("parent"), &[], 0x10, 0x40);
523 builder.add_inlinee(
524 0,
525 Name::from("child1"),
526 0x20,
527 0x10,
528 FileInfo::from_filename(b"parent.c"),
529 1,
530 );
531 builder.add_inlinee(
532 1,
533 Name::from("child2"),
534 0x20,
535 0x10,
536 FileInfo::from_filename(b"child1.c"),
537 1,
538 );
539 builder.add_inlinee(
540 0,
541 Name::from("child2"),
542 0x30,
543 0x10,
544 FileInfo::from_filename(b"parent.c"),
545 2,
546 );
547 builder.add_leaf_line(0x10, Some(0x10), FileInfo::from_filename(b"parent.c"), 1);
548 builder.add_leaf_line(0x20, Some(0x20), FileInfo::from_filename(b"child2.c"), 1);
549 builder.add_leaf_line(0x40, Some(0x10), FileInfo::from_filename(b"parent.c"), 1);
550 let func = builder.finish();
551
552 assert_eq!(func.name.as_str(), "parent");
553 assert_eq!(
554 &func.lines,
555 &[
556 LineInfo::new(0x10, 0x10, b"parent.c", 1),
557 LineInfo::new(0x20, 0x10, b"parent.c", 1),
558 LineInfo::new(0x30, 0x10, b"parent.c", 2),
559 LineInfo::new(0x40, 0x10, b"parent.c", 1),
560 ]
561 );
562
563 assert_eq!(func.inlinees.len(), 2);
564 assert_eq!(func.inlinees[0].name.as_str(), "child1");
565 assert_eq!(
566 &func.inlinees[0].lines,
567 &[LineInfo::new(0x20, 0x10, b"child1.c", 1),]
568 );
569 assert_eq!(func.inlinees[0].inlinees.len(), 1);
570 assert_eq!(func.inlinees[0].inlinees[0].name.as_str(), "child2");
571 assert_eq!(
572 &func.inlinees[0].inlinees[0].lines,
573 &[LineInfo::new(0x20, 0x10, b"child2.c", 1),]
574 );
575
576 assert_eq!(func.inlinees[1].name.as_str(), "child2");
577 assert_eq!(
578 &func.inlinees[1].lines,
579 &[LineInfo::new(0x30, 0x10, b"child2.c", 1),]
580 );
581 }
582}