pyroscope_rbspy_oncpu/ui/callgrind.rs
1use anyhow::Result;
2use std::cmp::min;
3use std::collections::{BTreeMap, HashMap};
4use std::io;
5
6use crate::core::types::StackFrame;
7
8/*
9 * **Notes about the overall design**
10 *
11 * The Callgrind format encodes performance data basically as a graph, where the nodes are
12 * functions (like `a`) and the edges are statistics about calls between functions. The `Locations`
13 * struct is where that graph is stored, and to print out the callgrind file at the end we iterate
14 * over that struct in a pretty straightforward way.
15 *
16 * Unlike the flamegraph format (which doesn't care about the order of the stack traces you've
17 * collected at all), the callgrind format **does** care about the order. The callgrind format
18 * implicitly assumes that we have a tracing profile of our program and that you can get exact
19 * counts of the number of calls between every 2 functions. Since rbspy is a sampling profiler,
20 * this means we have to make some assumptions to make this format work!
21 *
22 * **counting function calls**
23 *
24 * The 'count' field in the `Call` struct attempts to count function calls from a -> b
25 * The main assumption we make to do this is that if we have a stack (a,b,c) followed by another
26 * one with a common prefix (a,b,f,g), then that represents the same function call `a -> b`. This
27 * isn't necessarily true but if we're sampling at a high enough rate it's a reasonable assumption.
28 *
29 * Here's an example: let's assume we have these 4 stack traces:
30 *
31 *
32 * ```
33 * a b c d d g x
34 * a b c d d // count calls d -> g and g -> x
35 * a b c d // count calls d -> d
36 * a b c d d //
37 * a b e f g // count calls b -> c, c -> d, d -> d
38 * // end: count calls a -> b, b -> e, e -> f, f -> g
39 * ```
40 *
41 * For the above example, here's the data this callgrind code would store for a, b, c, d in the
42 * Locations struct.
43 *
44 * You can see that there are 3 numbers we track:
45 * * `exclusive`: number of times the function was at the top of a stack trace
46 * * `inclusive`: number of stack traces including a call x -> y
47 * * `count`: number of estimated calls from x -> y during execution (using assumption above)
48 *
49 * a: {exclusive: 0, calls: {b -> {inclusive: 4, count: 1}}}
50 * b: {exclusive: 0, calls: {c -> {inclusive: 3, count: 1}, e -> {inclusive: 1, count: 1}}}
51 * c: {exclusive: 0, calls: {d -> {inclusive: 3, count: 1}}}
52 * d: {exclusive: 3, calls: {d -> {inclusive: 4, count: 2}, g -> {inclusive: 1, count: 1}}}
53 *
54 */
55
56// Stats about the relationship between two functions, one of which
57// calls the other.
58#[derive(Debug)]
59struct Call {
60 // Estimate of number of times this call was made (see above comment)
61 count: usize,
62
63 // Number of stack traces including this call.
64 // 'a b c d' includes the call b -> c, 'a b e c d' does not
65 inclusive: usize,
66}
67
68// Stats about a single function.
69#[derive(Debug, Default)]
70struct Location {
71 // How many times does this function appear at the top of a stack trace
72 // where it's the most recent function called?
73 exclusive: usize,
74
75 // Data about the calls from this function to other functions.
76 calls: HashMap<StackFrame, Call>,
77}
78
79// Stats about all functions found in our samples.
80#[derive(Default, Debug)]
81struct Locations(HashMap<StackFrame, Location>);
82
83// Information about a function currently on the stack.
84#[derive(Debug)]
85struct StackEntry {
86 frame: StackFrame,
87
88 // How many samples were found inside this call only?
89 exclusive: usize,
90
91 // How many samples were found in this call, and sub-calls?
92 inclusive: usize,
93}
94
95// Tracks statistics about a program being sampled.
96#[derive(Default, Debug)]
97pub struct Stats {
98 // The current stack, along with tracking information.
99 // The root function is at element zero.
100 // Not used in final reporting, only for tracking an ongoing profile.
101 stack: Vec<StackEntry>,
102
103 // Overall stats about this program.
104 locations: Locations,
105}
106
107impl Locations {
108 // Get the current stats for a StackFrame. If it's never been seen before,
109 // automatically create an empty record and return that.
110 fn location(&mut self, frame: &StackFrame) -> &mut Location {
111 if !self.0.contains_key(frame) {
112 // Never seen this frame before, insert an empty record.
113 let loc = Location {
114 ..Default::default()
115 };
116 self.0.insert(frame.clone(), loc);
117 }
118 self.0.get_mut(frame).unwrap()
119 }
120
121 // Add to our stats the exclusive time for a given function.
122 fn add_exclusive(&mut self, entry: &StackEntry) {
123 self.location(&entry.frame).exclusive += entry.exclusive;
124 }
125
126 // Add to our stats info about a single call from a parent to a child
127 // function.
128 fn add_inclusive(&mut self, parent: &StackFrame, child: &StackEntry) {
129 let ploc = self.location(parent);
130 // If we've never seen this parent-child relationship, insert an empty
131 // record.
132 let val = ploc.calls.entry(child.frame.clone()).or_insert(Call {
133 count: 0,
134 inclusive: 0,
135 });
136
137 // Add both the count and the inclusive samples count.
138 val.count += 1;
139 val.inclusive += child.inclusive;
140 }
141}
142
143impl Stats {
144 // Create an empty stats tracker.
145 pub fn new() -> Stats {
146 Stats {
147 ..Default::default()
148 }
149 }
150
151 // Add a single stack sample to this Stats.
152 pub fn add(&mut self, stack: &[StackFrame]) {
153 // The input sample has the root function at the end. Reverse that!
154 let rev: Vec<_> = stack.iter().rev().collect();
155
156 // At this point, the previous stack (self.stack) and the new stack
157 // (rev) may have some parts that agree and others that differ:
158 //
159 // Old stack New stack
160 // +-------+ ^ +------+
161 // | root | | | root |
162 // +-------+ | +------+
163 // | A | Common | A |
164 // +-------+ | +------+
165 // | B | | | B |
166 // +-------+ ^ v ^ +------+
167 // | C | | | | X |
168 // +-------+ | Only new +------+
169 // | D | Only old | | Y |
170 // +-------+ | v +------+
171 // | E | |
172 // +-------+ v
173 //
174 // Three sections are important:
175 //
176 // 1. The common base (root, A, B)
177 // 2. The calls only on the previous stack (C, D, E)
178 // 3. The calls only on the new stack (X, Y)
179
180 // 1. Common items we can ignore. Find out how many there are, so we
181 // can skip them.
182 let mut common = 0;
183 let max_common = min(self.stack.len(), rev.len());
184 while common < max_common && &self.stack[common].frame == rev[common] {
185 common += 1;
186 }
187
188 // 2. Items only on the previous stack won't be kept. These already have exclusive +
189 // inclusive counts on them, so we just need to add those exclusive + inclusive counts
190 // into our data. For example if we have:
191 // c -> {exclusive: 2, inclusive: 5}
192 // d -> {exclusive: 0, inclusive: 2}
193 // e -> {exclusive: 5, inclusive: 10}
194 // then we'll:
195 // - add {2, 0, 5} to {c, d, e}'s exclusive counts respectively
196 // - add these counts to calls:
197 // b -> c : {count: 1, inclusive: 17}
198 // c -> d : {count: 1, inclusive: 12}
199 // d -> e : {count: 1, inclusive: 10}
200 // - add 17 to the 'inclusive' count on `b`
201 // We add up the inclusive counts because we only increment the 'inclusive' number on
202 // the top element of the stack, so the 'inclusive' number of every stack element is the
203 // sum of its inclusive property and that of all its children.
204 while self.stack.len() > common {
205 // For each entry, pop it from our stored stack, and track its
206 // exclusive sample count.
207 let entry = self.stack.pop().unwrap();
208 self.locations.add_exclusive(&entry);
209
210 if let Some(parent) = self.stack.last_mut() {
211 // If a parent is present, also track the inclusive sample count.
212 self.locations.add_inclusive(&parent.frame, &entry);
213
214 // Inclusive time of the child is also inclusive time of the parent,
215 // so attribute it to the parent. If multiple previous items exist,
216 // this will in turn be attributed to the grand-parent, etc.
217 parent.inclusive += entry.inclusive;
218 }
219 }
220 // Now our stored stack (self.stack) only includes common items, since we
221 // popped all the old ones.
222
223 // 3. Add new stack frames to our stored stack.
224 for item in rev.iter().skip(common) {
225 self.stack.push(StackEntry {
226 frame: (*item).clone(),
227 exclusive: 0,
228 inclusive: 0,
229 })
230 }
231 // Now our stored stack has the same structure as the stack sample (rev).
232
233 // Finally, we have to actually count samples somewhere! Add them to the
234 // last entry.
235 //
236 // We don't increment the inclusive time of everything on the stack here,
237 // it's easier to do the addition in step 2 above.
238 if let Some(entry) = self.stack.last_mut() {
239 entry.exclusive += 1;
240 entry.inclusive += 1;
241 }
242 }
243
244 // Finish adding samples to this Stats.
245 pub fn finish(&mut self) {
246 // To handle whatever remains on the stored stack, we can just add
247 // an empty stack. This causes us to integrate info for each of those
248 // frames--see step 2 in add().
249 self.add(&[]);
250 }
251
252 // Write a callgrind file based on the stats collected.
253 // SEe the format docs here: http://kcachegrind.sourceforge.net/html/CallgrindFormat.html
254 pub fn write(&self, w: &mut dyn io::Write) -> Result<()> {
255 // Write a header.
256 writeln!(w, "# callgrind format")?;
257 writeln!(w, "version: 1")?;
258 writeln!(w, "creator: rbspy")?;
259 writeln!(w, "events: Samples")?;
260
261 // Write the info for each function.
262 // Sort first, for consistent ordering.
263 let sorted: BTreeMap<_, _> = self.locations.0.iter().collect();
264 for (frame, loc) in sorted.iter() {
265 writeln!(w)?;
266 // Exclusive info, along with filename and function name.
267 writeln!(w, "fl={}", frame.path())?;
268 writeln!(w, "fn={}", &frame.name)?;
269 writeln!(w, "{} {}", frame.lineno.unwrap_or(0), loc.exclusive)?;
270
271 // Inclusive info for each function called by this one.
272 let csorted: BTreeMap<_, _> = loc.calls.iter().collect();
273 for (cframe, call) in csorted.iter() {
274 writeln!(w, "cfl={}", cframe.path())?;
275 writeln!(w, "cfn={}", &cframe.name)?;
276 writeln!(w, "calls={} {}", call.count, cframe.lineno.unwrap_or(0))?;
277 writeln!(w, "{} {}", frame.lineno.unwrap_or(0), call.inclusive)?;
278 }
279 }
280
281 Ok(())
282 }
283}
284
285#[cfg(test)]
286mod tests {
287 use crate::ui::callgrind::*;
288
289 // Build a test stackframe
290 fn f(i: usize) -> StackFrame {
291 StackFrame {
292 name: format!("func{}", i),
293 relative_path: format!("file{}.rb", i),
294 absolute_path: None,
295 lineno: Some(i),
296 }
297 }
298
299 // A stack frame from the same file as another one
300 fn fdup() -> StackFrame {
301 StackFrame {
302 name: "funcX".to_owned(),
303 relative_path: "file1.rb".to_owned(),
304 absolute_path: None,
305 lineno: Some(42),
306 }
307 }
308
309 // Assert that basic stats for a stack frame is as expected.
310 fn assert_location(stats: &Stats, f: StackFrame, exclusive: usize, children: usize) {
311 let loc = stats
312 .locations
313 .0
314 .get(&f)
315 .expect(format!("No location for {}", f).as_ref());
316 assert_eq!(loc.exclusive, exclusive, "Bad exclusive time for {}", f,);
317 assert_eq!(loc.calls.len(), children, "Bad children count for {}", f,);
318 }
319
320 // Assert that the inclusive stats for a parent/child pair is as expected.
321 fn assert_inclusive(
322 stats: &Stats,
323 parent: StackFrame,
324 child: StackFrame,
325 count: usize,
326 inclusive: usize,
327 ) {
328 let ploc = stats
329 .locations
330 .0
331 .get(&parent)
332 .expect(format!("No location for {}", parent).as_ref());
333 let call = ploc
334 .calls
335 .get(&child)
336 .expect(format!("No call of {} in {}", child, parent).as_ref());
337 assert_eq!(
338 call.count, count,
339 "Bad inclusive count for {} in {}",
340 child, parent,
341 );
342 assert_eq!(
343 call.inclusive, inclusive,
344 "Bad inclusive time for {} in {}",
345 child, parent,
346 )
347 }
348
349 // Track some fake stats for testing, into a Stats object.
350 fn build_test_stats() -> Stats {
351 let mut stats = Stats::new();
352
353 stats.add(&vec![f(1)]);
354 stats.add(&vec![f(3), f(2), f(1)]);
355 stats.add(&vec![f(2), f(1)]);
356 stats.add(&vec![f(3), f(1)]);
357 stats.add(&vec![f(2), f(1)]);
358 stats.add(&vec![f(3), fdup(), f(1)]);
359 stats.finish();
360
361 stats
362 }
363
364 // Test that we aggregate stats correctly.
365 #[test]
366 fn stats_aggregate() {
367 let stats = &build_test_stats();
368 assert!(
369 stats.stack.is_empty(),
370 "Stack not empty: {:#?}",
371 stats.stack
372 );
373 let len = stats.locations.0.len();
374 assert_eq!(len, 4, "Bad location count");
375 assert_location(stats, f(1), 1, 3);
376 assert_location(stats, f(2), 2, 1);
377 assert_location(stats, f(3), 3, 0);
378 assert_location(stats, fdup(), 0, 1);
379 assert_inclusive(stats, f(1), f(2), 2, 3);
380 assert_inclusive(stats, f(1), f(3), 1, 1);
381 assert_inclusive(stats, f(1), fdup(), 1, 1);
382 assert_inclusive(stats, f(2), f(3), 1, 1);
383 assert_inclusive(stats, fdup(), f(3), 1, 1);
384 }
385
386 // Test that we can write stats correctly.
387 #[test]
388 fn stats_write() {
389 let expected = "# callgrind format
390version: 1
391creator: rbspy
392events: Samples
393
394fl=file1.rb
395fn=func1
3961 1
397cfl=file1.rb
398cfn=funcX
399calls=1 42
4001 1
401cfl=file2.rb
402cfn=func2
403calls=2 2
4041 3
405cfl=file3.rb
406cfn=func3
407calls=1 3
4081 1
409
410fl=file1.rb
411fn=funcX
41242 0
413cfl=file3.rb
414cfn=func3
415calls=1 3
41642 1
417
418fl=file2.rb
419fn=func2
4202 2
421cfl=file3.rb
422cfn=func3
423calls=1 3
4242 1
425
426fl=file3.rb
427fn=func3
4283 3
429";
430
431 let mut buf: Vec<u8> = Vec::new();
432 build_test_stats()
433 .write(&mut buf)
434 .expect("Callgrind write failed");
435 let actual = String::from_utf8(buf).expect("Callgrind output not utf8");
436 assert_eq!(actual, expected, "Unexpected callgrind output");
437 }
438}