pyroscope_rbspy_oncpu/ui/
callgrind.rs

1use anyhow::Result;
2use std::cmp::min;
3use std::collections::{BTreeMap, HashMap};
4use std::io;
5
6use crate::core::types::StackFrame;
7
8/*
9 * **Notes about the overall design**
10 *
11 * The Callgrind format encodes performance data basically as a graph, where the nodes are
12 * functions (like `a`) and the edges are statistics about calls between functions. The `Locations`
13 * struct is where that graph is stored, and to print out the callgrind file at the end we iterate
14 * over that struct in a pretty straightforward way.
15 *
16 * Unlike the flamegraph format (which doesn't care about the order of the stack traces you've
17 * collected at all), the callgrind format **does** care about the order. The callgrind format
18 * implicitly assumes that we have a tracing profile of our program and that you can get exact
19 * counts of the number of calls between every 2 functions. Since rbspy is a sampling profiler,
20 * this means we have to make some assumptions to make this format work!
21 *
22 * **counting function calls**
23 *
24 * The 'count' field in the `Call` struct attempts to count function calls from a -> b
25 * The main assumption we make to do this is that if we have a stack (a,b,c) followed by another
26 * one with a common prefix (a,b,f,g), then that represents the same function call `a -> b`. This
27 * isn't necessarily true but if we're sampling at a high enough rate it's a reasonable assumption.
28 *
29 * Here's an example: let's assume we have these 4 stack traces:
30 *
31 *
32 * ```
33 * a b c d d g x
34 * a b c d d // count calls d -> g and g -> x
35 * a b c d   // count calls d -> d
36 * a b c d d //
37 * a b e f g // count calls b -> c, c -> d, d -> d
38 * // end: count calls a -> b, b -> e, e -> f, f -> g
39 * ```
40 *
41 * For the above example, here's the data this callgrind code would store for a, b, c, d in the
42 * Locations struct.
43 *
44 * You can see that there are 3 numbers we track:
45 *  * `exclusive`: number of times the function was at the top of a stack trace
46 *  * `inclusive`: number of stack traces including a call x -> y
47 *  * `count`: number of estimated calls from x -> y during execution (using assumption above)
48 *
49 * a: {exclusive: 0, calls: {b -> {inclusive: 4, count: 1}}}
50 * b: {exclusive: 0, calls: {c -> {inclusive: 3, count: 1}, e -> {inclusive: 1, count: 1}}}
51 * c: {exclusive: 0, calls: {d -> {inclusive: 3, count: 1}}}
52 * d: {exclusive: 3, calls: {d -> {inclusive: 4, count: 2}, g -> {inclusive: 1, count: 1}}}
53 *
54 */
55
56// Stats about the relationship between two functions, one of which
57// calls the other.
58#[derive(Debug)]
59struct Call {
60    // Estimate of number of times this call was made (see above comment)
61    count: usize,
62
63    // Number of stack traces including this call.
64    // 'a b c d' includes the call b -> c, 'a b e c d' does not
65    inclusive: usize,
66}
67
68// Stats about a single function.
69#[derive(Debug, Default)]
70struct Location {
71    // How many times does this function appear at the top of a stack trace
72    // where it's the most recent function called?
73    exclusive: usize,
74
75    // Data about the calls from this function to other functions.
76    calls: HashMap<StackFrame, Call>,
77}
78
79// Stats about all functions found in our samples.
80#[derive(Default, Debug)]
81struct Locations(HashMap<StackFrame, Location>);
82
83// Information about a function currently on the stack.
84#[derive(Debug)]
85struct StackEntry {
86    frame: StackFrame,
87
88    // How many samples were found inside this call only?
89    exclusive: usize,
90
91    // How many samples were found in this call, and sub-calls?
92    inclusive: usize,
93}
94
95// Tracks statistics about a program being sampled.
96#[derive(Default, Debug)]
97pub struct Stats {
98    // The current stack, along with tracking information.
99    // The root function is at element zero.
100    // Not used in final reporting, only for tracking an ongoing profile.
101    stack: Vec<StackEntry>,
102
103    // Overall stats about this program.
104    locations: Locations,
105}
106
107impl Locations {
108    // Get the current stats for a StackFrame. If it's never been seen before,
109    // automatically create an empty record and return that.
110    fn location(&mut self, frame: &StackFrame) -> &mut Location {
111        if !self.0.contains_key(frame) {
112            // Never seen this frame before, insert an empty record.
113            let loc = Location {
114                ..Default::default()
115            };
116            self.0.insert(frame.clone(), loc);
117        }
118        self.0.get_mut(frame).unwrap()
119    }
120
121    // Add to our stats the exclusive time for a given function.
122    fn add_exclusive(&mut self, entry: &StackEntry) {
123        self.location(&entry.frame).exclusive += entry.exclusive;
124    }
125
126    // Add to our stats info about a single call from a parent to a child
127    // function.
128    fn add_inclusive(&mut self, parent: &StackFrame, child: &StackEntry) {
129        let ploc = self.location(parent);
130        // If we've never seen this parent-child relationship, insert an empty
131        // record.
132        let val = ploc.calls.entry(child.frame.clone()).or_insert(Call {
133            count: 0,
134            inclusive: 0,
135        });
136
137        // Add both the count and the inclusive samples count.
138        val.count += 1;
139        val.inclusive += child.inclusive;
140    }
141}
142
143impl Stats {
144    // Create an empty stats tracker.
145    pub fn new() -> Stats {
146        Stats {
147            ..Default::default()
148        }
149    }
150
151    // Add a single stack sample to this Stats.
152    pub fn add(&mut self, stack: &[StackFrame]) {
153        // The input sample has the root function at the end. Reverse that!
154        let rev: Vec<_> = stack.iter().rev().collect();
155
156        // At this point, the previous stack (self.stack) and the new stack
157        // (rev) may have some parts that agree and others that differ:
158        //
159        // Old stack                      New stack
160        // +-------+          ^           +------+
161        // | root  |          |           | root |
162        // +-------+          |           +------+
163        // |   A   |        Common        |   A  |
164        // +-------+          |           +------+
165        // |   B   |          |           |   B  |
166        // +-------+      ^   v    ^      +------+
167        // |   C   |      |        |      |   X  |
168        // +-------+      |     Only new  +------+
169        // |   D   |   Only old    |      |   Y  |
170        // +-------+      |        v      +------+
171        // |   E   |      |
172        // +-------+      v
173        //
174        // Three sections are important:
175        //
176        // 1. The common base (root, A,  B)
177        // 2. The calls only on the previous stack (C, D, E)
178        // 3. The calls only on the new stack (X, Y)
179
180        // 1. Common items we can ignore. Find out how many there are, so we
181        // can skip them.
182        let mut common = 0;
183        let max_common = min(self.stack.len(), rev.len());
184        while common < max_common && &self.stack[common].frame == rev[common] {
185            common += 1;
186        }
187
188        // 2. Items only on the previous stack won't be kept. These already have exclusive +
189        //    inclusive counts on them, so we just need to add those exclusive + inclusive counts
190        //    into our data. For example if we have:
191        //          c -> {exclusive: 2, inclusive: 5}
192        //          d -> {exclusive: 0, inclusive: 2}
193        //          e -> {exclusive: 5, inclusive: 10}
194        //    then we'll:
195        //    - add {2, 0, 5} to {c, d, e}'s exclusive counts respectively
196        //    - add these counts to calls:
197        //         b -> c : {count: 1, inclusive: 17}
198        //         c -> d : {count: 1, inclusive: 12}
199        //         d -> e : {count: 1, inclusive: 10}
200        //    - add 17 to the 'inclusive' count on `b`
201        //    We add up the inclusive counts because we only increment the 'inclusive' number on
202        //    the top element of the stack, so the 'inclusive' number of every stack element is the
203        //    sum of its inclusive property and that of all its children.
204        while self.stack.len() > common {
205            // For each entry, pop it from our stored stack, and track its
206            // exclusive sample count.
207            let entry = self.stack.pop().unwrap();
208            self.locations.add_exclusive(&entry);
209
210            if let Some(parent) = self.stack.last_mut() {
211                // If a parent is present, also track the inclusive sample count.
212                self.locations.add_inclusive(&parent.frame, &entry);
213
214                // Inclusive time of the child is also inclusive time of the parent,
215                // so attribute it to the parent. If multiple previous items exist,
216                // this will in turn be attributed to the grand-parent, etc.
217                parent.inclusive += entry.inclusive;
218            }
219        }
220        // Now our stored stack (self.stack) only includes common items, since we
221        // popped all the old ones.
222
223        // 3. Add new stack frames to our stored stack.
224        for item in rev.iter().skip(common) {
225            self.stack.push(StackEntry {
226                frame: (*item).clone(),
227                exclusive: 0,
228                inclusive: 0,
229            })
230        }
231        // Now our stored stack has the same structure as the stack sample (rev).
232
233        // Finally, we have to actually count samples somewhere! Add them to the
234        // last entry.
235        //
236        // We don't increment the inclusive time of everything on the stack here,
237        // it's easier to do the addition in step 2 above.
238        if let Some(entry) = self.stack.last_mut() {
239            entry.exclusive += 1;
240            entry.inclusive += 1;
241        }
242    }
243
244    // Finish adding samples to this Stats.
245    pub fn finish(&mut self) {
246        // To handle whatever remains on the stored stack, we can just add
247        // an empty stack. This causes us to integrate info for each of those
248        // frames--see step 2 in add().
249        self.add(&[]);
250    }
251
252    // Write a callgrind file based on the stats collected.
253    // SEe the format docs here: http://kcachegrind.sourceforge.net/html/CallgrindFormat.html
254    pub fn write(&self, w: &mut dyn io::Write) -> Result<()> {
255        // Write a header.
256        writeln!(w, "# callgrind format")?;
257        writeln!(w, "version: 1")?;
258        writeln!(w, "creator: rbspy")?;
259        writeln!(w, "events: Samples")?;
260
261        // Write the info for each function.
262        // Sort first, for consistent ordering.
263        let sorted: BTreeMap<_, _> = self.locations.0.iter().collect();
264        for (frame, loc) in sorted.iter() {
265            writeln!(w)?;
266            // Exclusive info, along with filename and function name.
267            writeln!(w, "fl={}", frame.path())?;
268            writeln!(w, "fn={}", &frame.name)?;
269            writeln!(w, "{} {}", frame.lineno.unwrap_or(0), loc.exclusive)?;
270
271            // Inclusive info for each function called by this one.
272            let csorted: BTreeMap<_, _> = loc.calls.iter().collect();
273            for (cframe, call) in csorted.iter() {
274                writeln!(w, "cfl={}", cframe.path())?;
275                writeln!(w, "cfn={}", &cframe.name)?;
276                writeln!(w, "calls={} {}", call.count, cframe.lineno.unwrap_or(0))?;
277                writeln!(w, "{} {}", frame.lineno.unwrap_or(0), call.inclusive)?;
278            }
279        }
280
281        Ok(())
282    }
283}
284
285#[cfg(test)]
286mod tests {
287    use crate::ui::callgrind::*;
288
289    // Build a test stackframe
290    fn f(i: usize) -> StackFrame {
291        StackFrame {
292            name: format!("func{}", i),
293            relative_path: format!("file{}.rb", i),
294            absolute_path: None,
295            lineno: Some(i),
296        }
297    }
298
299    // A stack frame from the same file as another one
300    fn fdup() -> StackFrame {
301        StackFrame {
302            name: "funcX".to_owned(),
303            relative_path: "file1.rb".to_owned(),
304            absolute_path: None,
305            lineno: Some(42),
306        }
307    }
308
309    // Assert that basic stats for a stack frame is as expected.
310    fn assert_location(stats: &Stats, f: StackFrame, exclusive: usize, children: usize) {
311        let loc = stats
312            .locations
313            .0
314            .get(&f)
315            .expect(format!("No location for {}", f).as_ref());
316        assert_eq!(loc.exclusive, exclusive, "Bad exclusive time for {}", f,);
317        assert_eq!(loc.calls.len(), children, "Bad children count for {}", f,);
318    }
319
320    // Assert that the inclusive stats for a parent/child pair is as expected.
321    fn assert_inclusive(
322        stats: &Stats,
323        parent: StackFrame,
324        child: StackFrame,
325        count: usize,
326        inclusive: usize,
327    ) {
328        let ploc = stats
329            .locations
330            .0
331            .get(&parent)
332            .expect(format!("No location for {}", parent).as_ref());
333        let call = ploc
334            .calls
335            .get(&child)
336            .expect(format!("No call of {} in {}", child, parent).as_ref());
337        assert_eq!(
338            call.count, count,
339            "Bad inclusive count for {} in {}",
340            child, parent,
341        );
342        assert_eq!(
343            call.inclusive, inclusive,
344            "Bad inclusive time for {} in {}",
345            child, parent,
346        )
347    }
348
349    // Track some fake stats for testing, into a Stats object.
350    fn build_test_stats() -> Stats {
351        let mut stats = Stats::new();
352
353        stats.add(&vec![f(1)]);
354        stats.add(&vec![f(3), f(2), f(1)]);
355        stats.add(&vec![f(2), f(1)]);
356        stats.add(&vec![f(3), f(1)]);
357        stats.add(&vec![f(2), f(1)]);
358        stats.add(&vec![f(3), fdup(), f(1)]);
359        stats.finish();
360
361        stats
362    }
363
364    // Test that we aggregate stats correctly.
365    #[test]
366    fn stats_aggregate() {
367        let stats = &build_test_stats();
368        assert!(
369            stats.stack.is_empty(),
370            "Stack not empty: {:#?}",
371            stats.stack
372        );
373        let len = stats.locations.0.len();
374        assert_eq!(len, 4, "Bad location count");
375        assert_location(stats, f(1), 1, 3);
376        assert_location(stats, f(2), 2, 1);
377        assert_location(stats, f(3), 3, 0);
378        assert_location(stats, fdup(), 0, 1);
379        assert_inclusive(stats, f(1), f(2), 2, 3);
380        assert_inclusive(stats, f(1), f(3), 1, 1);
381        assert_inclusive(stats, f(1), fdup(), 1, 1);
382        assert_inclusive(stats, f(2), f(3), 1, 1);
383        assert_inclusive(stats, fdup(), f(3), 1, 1);
384    }
385
386    // Test that we can write stats correctly.
387    #[test]
388    fn stats_write() {
389        let expected = "# callgrind format
390version: 1
391creator: rbspy
392events: Samples
393
394fl=file1.rb
395fn=func1
3961 1
397cfl=file1.rb
398cfn=funcX
399calls=1 42
4001 1
401cfl=file2.rb
402cfn=func2
403calls=2 2
4041 3
405cfl=file3.rb
406cfn=func3
407calls=1 3
4081 1
409
410fl=file1.rb
411fn=funcX
41242 0
413cfl=file3.rb
414cfn=func3
415calls=1 3
41642 1
417
418fl=file2.rb
419fn=func2
4202 2
421cfl=file3.rb
422cfn=func3
423calls=1 3
4242 1
425
426fl=file3.rb
427fn=func3
4283 3
429";
430
431        let mut buf: Vec<u8> = Vec::new();
432        build_test_stats()
433            .write(&mut buf)
434            .expect("Callgrind write failed");
435        let actual = String::from_utf8(buf).expect("Callgrind output not utf8");
436        assert_eq!(actual, expected, "Unexpected callgrind output");
437    }
438}