symbolic_debuginfo/
function_builder.rs

1//! Contains [`FunctionBuilder`], which can be used to create a [`Function`]
2//! with inlinees and line records in the right structure.
3
4use std::{cmp::Reverse, collections::BinaryHeap};
5
6use crate::base::{FileInfo, Function, LineInfo};
7use symbolic_common::Name;
8
9/// Allows creating a [`Function`] from unordered line and inlinee records.
10///
11/// The created function will have the correct tree structure, all the line records will be on the
12/// correct function node within the tree, and all lines and inlinees will be sorted by address.
13pub struct FunctionBuilder<'s> {
14    /// The name of the outer function.
15    name: Name<'s>,
16    /// The compilation dir of the function.
17    compilation_dir: &'s [u8],
18    /// The address of the outer function.
19    address: u64,
20    /// The size of the outer function.
21    size: u64,
22    /// All inlinees at any depth inside this function.
23    ///
24    /// These are stored in a `BinaryHeap<Reverse<_>>` so that `.pop()` returns them ordered by
25    /// address, from low to high. (A [`BinaryHeap`] by itself is a max-heap, so we use [`Reverse`]
26    /// to make this a min-heap). We use a heap instead of a sorted `Vec` because we may need to
27    /// insert new elements during iteration, for inlinee splitting in `ensure_proper_nesting`.
28    inlinees: BinaryHeap<Reverse<FunctionBuilderInlinee<'s>>>,
29    /// The lines, in any order. They will be sorted in `finish()`. These record specify locations
30    /// at the innermost level of the inline stack at the line record's address.
31    lines: Vec<LineInfo<'s>>,
32}
33
34impl<'s> FunctionBuilder<'s> {
35    /// Create a new builder for a given outer function.
36    pub fn new(name: Name<'s>, compilation_dir: &'s [u8], address: u64, size: u64) -> Self {
37        Self {
38            name,
39            compilation_dir,
40            address,
41            size,
42            inlinees: BinaryHeap::new(),
43            lines: Vec::new(),
44        }
45    }
46
47    /// Add an inlinee record. This method can be called in any order.
48    ///
49    /// Inlinees which are called directly from the outer function have depth 0.
50    pub fn add_inlinee(
51        &mut self,
52        depth: u32,
53        name: Name<'s>,
54        address: u64,
55        size: u64,
56        call_file: FileInfo<'s>,
57        call_line: u64,
58    ) {
59        // An inlinee that starts before the function is obviously bogus.
60        if address < self.address {
61            return;
62        }
63
64        self.inlinees.push(Reverse(FunctionBuilderInlinee {
65            depth,
66            address,
67            size,
68            name,
69            call_file,
70            call_line,
71        }));
72    }
73
74    /// Add a line record, specifying the line at this address inside the innermost inlinee that
75    /// covers that address. This method can be called in any order.
76    pub fn add_leaf_line(
77        &mut self,
78        address: u64,
79        size: Option<u64>,
80        file: FileInfo<'s>,
81        line: u64,
82    ) {
83        // A line record that starts before the function is obviously bogus.
84        if address < self.address {
85            return;
86        }
87
88        self.lines.push(LineInfo {
89            address,
90            size,
91            file,
92            line,
93        });
94    }
95
96    /// Create the `Function`, consuming the builder.
97    pub fn finish(self) -> Function<'s> {
98        // Convert our data into the right shape.
99        // There are two big differences between what we have and what we want:
100        //  - We have all inlinees in a flat list, but we want to create nested functions for them,
101        //    forming a tree structure.
102        //  - Our line records are in a flat list but they describe lines at different levels of
103        //    inlining. We need to assign the line records to the correct function, at the correct
104        //    level.
105        let FunctionBuilder {
106            name,
107            compilation_dir,
108            address,
109            size,
110            inlinees,
111            mut lines,
112        } = self;
113
114        let inlinees = ensure_proper_nesting(inlinees);
115
116        // Sort the lines by address.
117        lines.sort_by_key(|line| line.address);
118
119        let outer_function = Function {
120            address,
121            size,
122            name,
123            compilation_dir,
124            lines: Vec::new(),
125            inlinees: Vec::new(),
126            inline: false,
127        };
128        let outer_function_end = address + size;
129        let mut stack = FunctionBuilderStack::new(outer_function);
130
131        let mut inlinee_iter = inlinees.into_iter();
132        let mut line_iter = lines.into_iter();
133
134        let mut next_inlinee = inlinee_iter.next();
135        let mut next_line = line_iter.next();
136
137        // Iterate over lines and inlinees.
138        loop {
139            // If we have both a line and an inlinee at the same address, process the inlinee first.
140            // The line belongs "inside" that inlinee.
141            if next_inlinee.is_some()
142                && (next_line.is_none()
143                    || next_inlinee.as_ref().unwrap().address
144                        <= next_line.as_ref().unwrap().address)
145            {
146                let inlinee = next_inlinee.take().unwrap();
147                stack.flush_address(inlinee.address);
148                stack.flush_depth(inlinee.depth);
149
150                if inlinee.address >= outer_function_end {
151                    break;
152                }
153
154                stack.last_mut().lines.push(LineInfo {
155                    address: inlinee.address,
156                    size: Some(inlinee.size),
157                    file: inlinee.call_file,
158                    line: inlinee.call_line,
159                });
160                stack.push(Function {
161                    address: inlinee.address,
162                    size: inlinee.size,
163                    name: inlinee.name,
164                    compilation_dir,
165                    lines: Vec::new(),
166                    inlinees: Vec::new(),
167                    inline: true,
168                });
169                next_inlinee = inlinee_iter.next();
170                continue;
171            }
172
173            // Process the line.
174            if let Some(mut line) = next_line.take() {
175                stack.flush_address(line.address);
176
177                if line.address >= outer_function_end {
178                    break;
179                }
180
181                // Ensure that Function lines are non-overlapping, by splitting lines so that they
182                // don't cross inlinee boundaries. If lines were overlapping across inlinee
183                // boundaries, then they would overlap with the lines that we create for the calls
184                // to those inlinees.
185                // We have to split up a line in two cases: If it overlaps with the end of the
186                // current inlinee, or if it overlaps with the start of the next inlinee.
187                if let Some(size) = line.size {
188                    let line_end = line.address.saturating_add(size);
189                    let current_innermost_fun_end = stack.last_mut().end_address();
190                    let split_address = if let Some(next_inlinee) = next_inlinee.as_ref() {
191                        next_inlinee.address.min(current_innermost_fun_end)
192                    } else {
193                        current_innermost_fun_end
194                    };
195                    if split_address < line_end {
196                        // We have overlap! Split the line up.
197                        let mut split_line = line.clone();
198                        split_line.address = split_address;
199                        split_line.size = Some(line_end - split_address);
200                        line.size = Some(split_address - line.address);
201                        stack.last_mut().lines.push(line);
202
203                        next_line = Some(split_line);
204                        continue;
205                    }
206                }
207
208                // Handle the case where no overlap was detected.
209                stack.last_mut().lines.push(line);
210                next_line = line_iter.next();
211                continue;
212            }
213
214            // If we get here, we have run out of both lines and inlinees, and we're done.
215            break;
216        }
217
218        stack.finish()
219    }
220}
221
222/// Represents a contiguous address range which is covered by an inlined function call.
223#[derive(PartialEq, Eq, Clone, Debug)]
224struct FunctionBuilderInlinee<'s> {
225    /// The inline nesting level of this inline call. Calls from the outer function have depth 0.
226    pub depth: u32,
227    /// The start address.
228    pub address: u64,
229    /// The size in bytes.
230    pub size: u64,
231    /// The name of the function which is called.
232    pub name: Name<'s>,
233    /// The file name of the location of the call.
234    pub call_file: FileInfo<'s>,
235    /// The line number of the location of the call.
236    pub call_line: u64,
237}
238
239/// Implement ordering in DFS order, i.e. first by address and then by depth.
240impl PartialOrd for FunctionBuilderInlinee<'_> {
241    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
242        Some(self.cmp(other))
243    }
244}
245
246/// Implement ordering in DFS order, i.e. first by address and then by depth.
247impl Ord for FunctionBuilderInlinee<'_> {
248    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
249        (self.address, self.depth).cmp(&(other.address, other.depth))
250    }
251}
252
253/// Keeps track of the current inline stack, when iterating inlinees in DFS order.
254struct FunctionBuilderStack<'s> {
255    /// The current inline stack, elements are (end_address, function).
256    ///
257    /// Always contains at least one element: `stack[0].1` is the outer function.
258    stack: Vec<(u64, Function<'s>)>,
259}
260
261impl<'s> FunctionBuilderStack<'s> {
262    /// Creates a new stack, initialized with the outer function.
263    pub fn new(outer_function: Function<'s>) -> Self {
264        let end_address = outer_function.address.saturating_add(outer_function.size);
265        let stack = vec![(end_address, outer_function)];
266        Self { stack }
267    }
268
269    /// Returns an exclusive reference to the function at the top of the stack, i.e. the "deepest"
270    /// function.
271    pub fn last_mut(&mut self) -> &mut Function<'s> {
272        &mut self.stack.last_mut().unwrap().1
273    }
274
275    /// Pops the deepest function from the stack and adds it to the inlinees of its caller.
276    fn pop(&mut self) {
277        assert!(self.stack.len() > 1);
278
279        // Pop the function and add it to its parent function's list of inlinees.
280        let fun = self.stack.pop().unwrap().1;
281        self.stack.last_mut().unwrap().1.inlinees.push(fun);
282    }
283
284    /// Finish and pop all functions that end at or before this address.
285    pub fn flush_address(&mut self, address: u64) {
286        while self.stack.len() > 1 && self.stack.last().unwrap().0 <= address {
287            self.pop();
288        }
289    }
290
291    /// Finish and pop all functions that are at the given depth / "nesting level" or deeper.
292    pub fn flush_depth(&mut self, depth: u32) {
293        while self.stack.len() > depth as usize + 1 {
294            self.pop();
295        }
296    }
297
298    /// Push an inlinee to the stack.
299    pub fn push(&mut self, inlinee: Function<'s>) {
300        let end_address = inlinee.address.saturating_add(inlinee.size);
301        self.stack.push((end_address, inlinee));
302    }
303
304    /// Finish the entire stack and return the outer function.
305    pub fn finish(mut self) -> Function<'s> {
306        while self.stack.len() > 1 {
307            self.pop();
308        }
309        self.stack.pop().unwrap().1
310    }
311}
312
313/// Converts the `BinaryHeap` of inlinees into a sorted `Vec` of inlinees, while ensuring proper
314/// inlinee nesting.
315///
316/// # Background
317///
318/// Inlinees are organized in a tree, where each node has a start address and an end address.
319/// For this tree to be well-formed, sibling nodes need to have non-overlapping address ranges, and
320/// child nodes must not extend beyond their parent nodes.
321///
322/// Overlapping siblings are always considered malformed data; if the input data has overlapping
323/// siblings there is a bug in the system which produced the input data.
324///
325/// However, child nodes may legitimately extend beyond their parents in the input data.
326/// This happens when `FunctionBuilder` is used for Breakpad .sym files.
327///
328/// This function splits and redistributes such nodes over other parents. In the output data from
329/// this function, child nodes will not extend beyond their parents.
330///
331/// Example:
332///
333/// ```plain
334/// 0x0..0x8: a() @ a.cpp:10 -> b() @ b.cpp:20 -> c() @ c.cpp:30
335/// 0x8..0xd: a() @ a.cpp:15 -> b() @ b.cpp:20 -> c() @ c.cpp:30
336///
337/// may be equivalently expressed as:
338///
339/// Representation A: Properly nested
340///  - b() called from a.cpp:10 at 0x0..0x8
341///    - c() called from b.cpp:20 at 0x0..0x8
342///  - b() called from a.cpp:15 at 0x8..0xd
343///    - c() called from b.cpp:20 at 0x8..0xd
344///
345/// or as:
346///
347/// Representation B: Improperly nested, but unambiguous and compact
348///  - b() called from a.cpp:10 at 0x0..0x8
349///    - c() called from b.cpp:20 at 0x0..0xd   <-- extends beyond parent
350///  - b() called from a.cpp:10 at 0x0..0x8
351///
352/// In Representation B, the two c() inlinees were merged into one, even though they have different
353/// parents. This is valid input.
354///
355/// This function will convert Representation B into Representation A.
356/// ```
357///
358/// To create a well-formed tree, this function does the following:
359///
360///  - If a node overlaps with its previous sibling, the node's start address is adjusted to avoid
361///    overlap.
362///  - If a node extends beyond its parent, it is split into two nodes.
363///  - Free-floating nodes are removed, i.e. a node at depth N+1 at an address at which there is no
364///    node at depth N.
365fn ensure_proper_nesting(
366    mut inlinees: BinaryHeap<Reverse<FunctionBuilderInlinee>>,
367) -> Vec<FunctionBuilderInlinee> {
368    let mut result = Vec::with_capacity(inlinees.len());
369
370    // This stack contains, at index i, the end address of the most recent inlinee at depth i.
371    // The length of this stack is the current depth. During the iteration we traverse the inlinee
372    // tree in DFS order, so the "current depth" changes as we enter and exit inlinees.
373    let mut end_address_stack = Vec::new();
374
375    // Iterate the inlinees, ordered by (address, depth), in order of increasing address.
376    // We take each inlinee out of `inlinees`, check it, and then append it to `result`.
377    while let Some(Reverse(mut inlinee)) = inlinees.pop() {
378        let depth = inlinee.depth as usize;
379        let start_address = inlinee.address;
380        let mut end_address = match start_address.checked_add(inlinee.size) {
381            Some(end_address) => end_address,
382            None => continue,
383        };
384
385        if end_address_stack.len() < depth {
386            // This node has no parent. Skip it.
387            continue;
388        }
389
390        if let Some(&previous_sibling_end_address) = end_address_stack.get(depth) {
391            if end_address <= previous_sibling_end_address {
392                // This node is completely engulfed by its previous sibling.
393                // Skip this node.
394                continue;
395            }
396            if start_address < previous_sibling_end_address {
397                let new_start_address = previous_sibling_end_address;
398                // Resolve overlap by adjusting this node's start address and size, keeping its
399                // end address unchanged.
400                inlinee.address = new_start_address;
401                inlinee.size = end_address - new_start_address;
402                // Put it back into the heap so that it is processed in the correct order.
403                inlinees.push(Reverse(inlinee));
404                continue;
405            }
406        }
407
408        end_address_stack.truncate(depth);
409        debug_assert_eq!(end_address_stack.len(), depth, "due to len() check + trunc");
410
411        if let Some(&caller_end_address) = end_address_stack.last() {
412            // We know: start_address >= caller_start_address, ensured by the sort order.
413            // But is start_address < caller_end_address?
414            if start_address >= caller_end_address {
415                // This node is completely outside its parent. This must mean that it does not have
416                // a parent, otherwise we would have encountered its parent.
417                // Skip this node.
418                continue;
419            }
420            if end_address > caller_end_address {
421                // This node extends beyond its caller. Split it into two.
422                let split_address = caller_end_address;
423                let mut split_inlinee = inlinee.clone();
424                split_inlinee.address = split_address;
425                split_inlinee.size = end_address - split_address;
426                inlinees.push(Reverse(split_inlinee));
427
428                // Shorten the current inlinee.
429                inlinee.size = split_address - start_address;
430                end_address = split_address;
431            }
432        } else {
433            // This is an inlinee at depth 0, i.e. this inlinee is directly called by the outer
434            // function. Those inlinees can be as long as they want; we don't try to
435            // force them to stay within the outer function's address range here.
436        }
437        result.push(inlinee);
438        end_address_stack.push(end_address); // this goes into end_address_stack[depth]
439    }
440    result
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446
447    #[test]
448    fn test_simple() {
449        // 0x10 - 0x40: foo in foo.c on line 1
450        let mut builder = FunctionBuilder::new(Name::from("foo"), &[], 0x10, 0x30);
451        builder.add_leaf_line(0x10, Some(0x30), FileInfo::from_filename(b"foo.c"), 1);
452        let func = builder.finish();
453
454        assert_eq!(func.name.as_str(), "foo");
455        assert_eq!(&func.lines, &[LineInfo::new(0x10, 0x30, b"foo.c", 1)]);
456    }
457
458    #[test]
459    fn test_inlinee() {
460        // 0x10 - 0x20: foo in foo.c on line 1
461        // 0x20 - 0x40: bar in bar.c on line 1
462        // - inlined into: foo in foo.c on line 2
463        let mut builder = FunctionBuilder::new(Name::from("foo"), &[], 0x10, 0x30);
464        builder.add_inlinee(
465            0,
466            Name::from("bar"),
467            0x20,
468            0x20,
469            FileInfo::from_filename(b"foo.c"),
470            2,
471        );
472        builder.add_leaf_line(0x10, Some(0x10), FileInfo::from_filename(b"foo.c"), 1);
473        builder.add_leaf_line(0x20, Some(0x20), FileInfo::from_filename(b"bar.c"), 1);
474        let func = builder.finish();
475
476        // the outer function has two line records, one for itself, the other for the inlined call
477        assert_eq!(func.name.as_str(), "foo");
478        assert_eq!(
479            &func.lines,
480            &[
481                LineInfo::new(0x10, 0x10, b"foo.c", 1),
482                LineInfo::new(0x20, 0x20, b"foo.c", 2)
483            ]
484        );
485
486        assert_eq!(func.inlinees.len(), 1);
487        assert_eq!(func.inlinees[0].name.as_str(), "bar");
488        assert_eq!(
489            &func.inlinees[0].lines,
490            &[LineInfo::new(0x20, 0x20, b"bar.c", 1)]
491        );
492    }
493
494    #[test]
495    fn test_longer_line_record() {
496        // Consider the following code:
497        //
498        // ```
499        //   | fn parent() {
500        // 1 |   child1();
501        // 2 |   child2();
502        //   | }
503        // 1 | fn child1() { child2() }
504        // 1 | fn child2() {}
505        // ```
506        //
507        // we assume here that we transitively inline `child2` all the way into `parent`.
508        // but we only have a single line record for that whole chunk of code,
509        // even though the inlining hierarchy specifies two different call sites
510        //
511        // addr:    0x10 0x20 0x30 0x40 0x50
512        //          v    v    v    v    v
513        // # DWARF hierarchy
514        // parent:  |-------------------|
515        // child1:       |----|           (called from parent.c line 1)
516        // child2:       |----|           (called from child1.c line 1)
517        //                    |----|      (called from parent.c line 2)
518        // # line records
519        //          |----|         |----| (parent.c line 1)
520        //               |---------|      (child2.c line 1)
521
522        let mut builder = FunctionBuilder::new(Name::from("parent"), &[], 0x10, 0x40);
523        builder.add_inlinee(
524            0,
525            Name::from("child1"),
526            0x20,
527            0x10,
528            FileInfo::from_filename(b"parent.c"),
529            1,
530        );
531        builder.add_inlinee(
532            1,
533            Name::from("child2"),
534            0x20,
535            0x10,
536            FileInfo::from_filename(b"child1.c"),
537            1,
538        );
539        builder.add_inlinee(
540            0,
541            Name::from("child2"),
542            0x30,
543            0x10,
544            FileInfo::from_filename(b"parent.c"),
545            2,
546        );
547        builder.add_leaf_line(0x10, Some(0x10), FileInfo::from_filename(b"parent.c"), 1);
548        builder.add_leaf_line(0x20, Some(0x20), FileInfo::from_filename(b"child2.c"), 1);
549        builder.add_leaf_line(0x40, Some(0x10), FileInfo::from_filename(b"parent.c"), 1);
550        let func = builder.finish();
551
552        assert_eq!(func.name.as_str(), "parent");
553        assert_eq!(
554            &func.lines,
555            &[
556                LineInfo::new(0x10, 0x10, b"parent.c", 1),
557                LineInfo::new(0x20, 0x10, b"parent.c", 1),
558                LineInfo::new(0x30, 0x10, b"parent.c", 2),
559                LineInfo::new(0x40, 0x10, b"parent.c", 1),
560            ]
561        );
562
563        assert_eq!(func.inlinees.len(), 2);
564        assert_eq!(func.inlinees[0].name.as_str(), "child1");
565        assert_eq!(
566            &func.inlinees[0].lines,
567            &[LineInfo::new(0x20, 0x10, b"child1.c", 1),]
568        );
569        assert_eq!(func.inlinees[0].inlinees.len(), 1);
570        assert_eq!(func.inlinees[0].inlinees[0].name.as_str(), "child2");
571        assert_eq!(
572            &func.inlinees[0].inlinees[0].lines,
573            &[LineInfo::new(0x20, 0x10, b"child2.c", 1),]
574        );
575
576        assert_eq!(func.inlinees[1].name.as_str(), "child2");
577        assert_eq!(
578            &func.inlinees[1].lines,
579            &[LineInfo::new(0x30, 0x10, b"child2.c", 1),]
580        );
581    }
582}