pyroscope-rbspy-oncpu 0.19.1

Sampling CPU profiler for Ruby
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
use anyhow::Result;
use std::cmp::min;
use std::collections::{BTreeMap, HashMap};
use std::io;

use crate::core::types::StackFrame;

/*
 * **Notes about the overall design**
 *
 * The Callgrind format encodes performance data basically as a graph, where the nodes are
 * functions (like `a`) and the edges are statistics about calls between functions. The `Locations`
 * struct is where that graph is stored, and to print out the callgrind file at the end we iterate
 * over that struct in a pretty straightforward way.
 *
 * Unlike the flamegraph format (which doesn't care about the order of the stack traces you've
 * collected at all), the callgrind format **does** care about the order. The callgrind format
 * implicitly assumes that we have a tracing profile of our program and that you can get exact
 * counts of the number of calls between every 2 functions. Since rbspy is a sampling profiler,
 * this means we have to make some assumptions to make this format work!
 *
 * **counting function calls**
 *
 * The 'count' field in the `Call` struct attempts to count function calls from a -> b
 * The main assumption we make to do this is that if we have a stack (a,b,c) followed by another
 * one with a common prefix (a,b,f,g), then that represents the same function call `a -> b`. This
 * isn't necessarily true but if we're sampling at a high enough rate it's a reasonable assumption.
 *
 * Here's an example: let's assume we have these 4 stack traces:
 *
 *
 * ```
 * a b c d d g x
 * a b c d d // count calls d -> g and g -> x
 * a b c d   // count calls d -> d
 * a b c d d //
 * a b e f g // count calls b -> c, c -> d, d -> d
 * // end: count calls a -> b, b -> e, e -> f, f -> g
 * ```
 *
 * For the above example, here's the data this callgrind code would store for a, b, c, d in the
 * Locations struct.
 *
 * You can see that there are 3 numbers we track:
 *  * `exclusive`: number of times the function was at the top of a stack trace
 *  * `inclusive`: number of stack traces including a call x -> y
 *  * `count`: number of estimated calls from x -> y during execution (using assumption above)
 *
 * a: {exclusive: 0, calls: {b -> {inclusive: 4, count: 1}}}
 * b: {exclusive: 0, calls: {c -> {inclusive: 3, count: 1}, e -> {inclusive: 1, count: 1}}}
 * c: {exclusive: 0, calls: {d -> {inclusive: 3, count: 1}}}
 * d: {exclusive: 3, calls: {d -> {inclusive: 4, count: 2}, g -> {inclusive: 1, count: 1}}}
 *
 */

// Stats about the relationship between two functions, one of which
// calls the other.
#[derive(Debug)]
struct Call {
    // Estimate of number of times this call was made (see above comment)
    count: usize,

    // Number of stack traces including this call.
    // 'a b c d' includes the call b -> c, 'a b e c d' does not
    inclusive: usize,
}

// Stats about a single function.
#[derive(Debug, Default)]
struct Location {
    // How many times does this function appear at the top of a stack trace
    // where it's the most recent function called?
    exclusive: usize,

    // Data about the calls from this function to other functions.
    calls: HashMap<StackFrame, Call>,
}

// Stats about all functions found in our samples.
#[derive(Default, Debug)]
struct Locations(HashMap<StackFrame, Location>);

// Information about a function currently on the stack.
#[derive(Debug)]
struct StackEntry {
    frame: StackFrame,

    // How many samples were found inside this call only?
    exclusive: usize,

    // How many samples were found in this call, and sub-calls?
    inclusive: usize,
}

// Tracks statistics about a program being sampled.
#[derive(Default, Debug)]
pub struct Stats {
    // The current stack, along with tracking information.
    // The root function is at element zero.
    // Not used in final reporting, only for tracking an ongoing profile.
    stack: Vec<StackEntry>,

    // Overall stats about this program.
    locations: Locations,
}

impl Locations {
    // Get the current stats for a StackFrame. If it's never been seen before,
    // automatically create an empty record and return that.
    fn location(&mut self, frame: &StackFrame) -> &mut Location {
        if !self.0.contains_key(frame) {
            // Never seen this frame before, insert an empty record.
            let loc = Location {
                ..Default::default()
            };
            self.0.insert(frame.clone(), loc);
        }
        self.0.get_mut(frame).unwrap()
    }

    // Add to our stats the exclusive time for a given function.
    fn add_exclusive(&mut self, entry: &StackEntry) {
        self.location(&entry.frame).exclusive += entry.exclusive;
    }

    // Add to our stats info about a single call from a parent to a child
    // function.
    fn add_inclusive(&mut self, parent: &StackFrame, child: &StackEntry) {
        let ploc = self.location(parent);
        // If we've never seen this parent-child relationship, insert an empty
        // record.
        let val = ploc.calls.entry(child.frame.clone()).or_insert(Call {
            count: 0,
            inclusive: 0,
        });

        // Add both the count and the inclusive samples count.
        val.count += 1;
        val.inclusive += child.inclusive;
    }
}

impl Stats {
    // Create an empty stats tracker.
    pub fn new() -> Stats {
        Stats {
            ..Default::default()
        }
    }

    // Add a single stack sample to this Stats.
    pub fn add(&mut self, stack: &[StackFrame]) {
        // The input sample has the root function at the end. Reverse that!
        let rev: Vec<_> = stack.iter().rev().collect();

        // At this point, the previous stack (self.stack) and the new stack
        // (rev) may have some parts that agree and others that differ:
        //
        // Old stack                      New stack
        // +-------+          ^           +------+
        // | root  |          |           | root |
        // +-------+          |           +------+
        // |   A   |        Common        |   A  |
        // +-------+          |           +------+
        // |   B   |          |           |   B  |
        // +-------+      ^   v    ^      +------+
        // |   C   |      |        |      |   X  |
        // +-------+      |     Only new  +------+
        // |   D   |   Only old    |      |   Y  |
        // +-------+      |        v      +------+
        // |   E   |      |
        // +-------+      v
        //
        // Three sections are important:
        //
        // 1. The common base (root, A,  B)
        // 2. The calls only on the previous stack (C, D, E)
        // 3. The calls only on the new stack (X, Y)

        // 1. Common items we can ignore. Find out how many there are, so we
        // can skip them.
        let mut common = 0;
        let max_common = min(self.stack.len(), rev.len());
        while common < max_common && &self.stack[common].frame == rev[common] {
            common += 1;
        }

        // 2. Items only on the previous stack won't be kept. These already have exclusive +
        //    inclusive counts on them, so we just need to add those exclusive + inclusive counts
        //    into our data. For example if we have:
        //          c -> {exclusive: 2, inclusive: 5}
        //          d -> {exclusive: 0, inclusive: 2}
        //          e -> {exclusive: 5, inclusive: 10}
        //    then we'll:
        //    - add {2, 0, 5} to {c, d, e}'s exclusive counts respectively
        //    - add these counts to calls:
        //         b -> c : {count: 1, inclusive: 17}
        //         c -> d : {count: 1, inclusive: 12}
        //         d -> e : {count: 1, inclusive: 10}
        //    - add 17 to the 'inclusive' count on `b`
        //    We add up the inclusive counts because we only increment the 'inclusive' number on
        //    the top element of the stack, so the 'inclusive' number of every stack element is the
        //    sum of its inclusive property and that of all its children.
        while self.stack.len() > common {
            // For each entry, pop it from our stored stack, and track its
            // exclusive sample count.
            let entry = self.stack.pop().unwrap();
            self.locations.add_exclusive(&entry);

            if let Some(parent) = self.stack.last_mut() {
                // If a parent is present, also track the inclusive sample count.
                self.locations.add_inclusive(&parent.frame, &entry);

                // Inclusive time of the child is also inclusive time of the parent,
                // so attribute it to the parent. If multiple previous items exist,
                // this will in turn be attributed to the grand-parent, etc.
                parent.inclusive += entry.inclusive;
            }
        }
        // Now our stored stack (self.stack) only includes common items, since we
        // popped all the old ones.

        // 3. Add new stack frames to our stored stack.
        for item in rev.iter().skip(common) {
            self.stack.push(StackEntry {
                frame: (*item).clone(),
                exclusive: 0,
                inclusive: 0,
            })
        }
        // Now our stored stack has the same structure as the stack sample (rev).

        // Finally, we have to actually count samples somewhere! Add them to the
        // last entry.
        //
        // We don't increment the inclusive time of everything on the stack here,
        // it's easier to do the addition in step 2 above.
        if let Some(entry) = self.stack.last_mut() {
            entry.exclusive += 1;
            entry.inclusive += 1;
        }
    }

    // Finish adding samples to this Stats.
    pub fn finish(&mut self) {
        // To handle whatever remains on the stored stack, we can just add
        // an empty stack. This causes us to integrate info for each of those
        // frames--see step 2 in add().
        self.add(&[]);
    }

    // Write a callgrind file based on the stats collected.
    // SEe the format docs here: http://kcachegrind.sourceforge.net/html/CallgrindFormat.html
    pub fn write(&self, w: &mut dyn io::Write) -> Result<()> {
        // Write a header.
        writeln!(w, "# callgrind format")?;
        writeln!(w, "version: 1")?;
        writeln!(w, "creator: rbspy")?;
        writeln!(w, "events: Samples")?;

        // Write the info for each function.
        // Sort first, for consistent ordering.
        let sorted: BTreeMap<_, _> = self.locations.0.iter().collect();
        for (frame, loc) in sorted.iter() {
            writeln!(w)?;
            // Exclusive info, along with filename and function name.
            writeln!(w, "fl={}", frame.path())?;
            writeln!(w, "fn={}", &frame.name)?;
            writeln!(w, "{} {}", frame.lineno.unwrap_or(0), loc.exclusive)?;

            // Inclusive info for each function called by this one.
            let csorted: BTreeMap<_, _> = loc.calls.iter().collect();
            for (cframe, call) in csorted.iter() {
                writeln!(w, "cfl={}", cframe.path())?;
                writeln!(w, "cfn={}", &cframe.name)?;
                writeln!(w, "calls={} {}", call.count, cframe.lineno.unwrap_or(0))?;
                writeln!(w, "{} {}", frame.lineno.unwrap_or(0), call.inclusive)?;
            }
        }

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use crate::ui::callgrind::*;

    // Build a test stackframe
    fn f(i: usize) -> StackFrame {
        StackFrame {
            name: format!("func{}", i),
            relative_path: format!("file{}.rb", i),
            absolute_path: None,
            lineno: Some(i),
        }
    }

    // A stack frame from the same file as another one
    fn fdup() -> StackFrame {
        StackFrame {
            name: "funcX".to_owned(),
            relative_path: "file1.rb".to_owned(),
            absolute_path: None,
            lineno: Some(42),
        }
    }

    // Assert that basic stats for a stack frame is as expected.
    fn assert_location(stats: &Stats, f: StackFrame, exclusive: usize, children: usize) {
        let loc = stats
            .locations
            .0
            .get(&f)
            .expect(format!("No location for {}", f).as_ref());
        assert_eq!(loc.exclusive, exclusive, "Bad exclusive time for {}", f,);
        assert_eq!(loc.calls.len(), children, "Bad children count for {}", f,);
    }

    // Assert that the inclusive stats for a parent/child pair is as expected.
    fn assert_inclusive(
        stats: &Stats,
        parent: StackFrame,
        child: StackFrame,
        count: usize,
        inclusive: usize,
    ) {
        let ploc = stats
            .locations
            .0
            .get(&parent)
            .expect(format!("No location for {}", parent).as_ref());
        let call = ploc
            .calls
            .get(&child)
            .expect(format!("No call of {} in {}", child, parent).as_ref());
        assert_eq!(
            call.count, count,
            "Bad inclusive count for {} in {}",
            child, parent,
        );
        assert_eq!(
            call.inclusive, inclusive,
            "Bad inclusive time for {} in {}",
            child, parent,
        )
    }

    // Track some fake stats for testing, into a Stats object.
    fn build_test_stats() -> Stats {
        let mut stats = Stats::new();

        stats.add(&vec![f(1)]);
        stats.add(&vec![f(3), f(2), f(1)]);
        stats.add(&vec![f(2), f(1)]);
        stats.add(&vec![f(3), f(1)]);
        stats.add(&vec![f(2), f(1)]);
        stats.add(&vec![f(3), fdup(), f(1)]);
        stats.finish();

        stats
    }

    // Test that we aggregate stats correctly.
    #[test]
    fn stats_aggregate() {
        let stats = &build_test_stats();
        assert!(
            stats.stack.is_empty(),
            "Stack not empty: {:#?}",
            stats.stack
        );
        let len = stats.locations.0.len();
        assert_eq!(len, 4, "Bad location count");
        assert_location(stats, f(1), 1, 3);
        assert_location(stats, f(2), 2, 1);
        assert_location(stats, f(3), 3, 0);
        assert_location(stats, fdup(), 0, 1);
        assert_inclusive(stats, f(1), f(2), 2, 3);
        assert_inclusive(stats, f(1), f(3), 1, 1);
        assert_inclusive(stats, f(1), fdup(), 1, 1);
        assert_inclusive(stats, f(2), f(3), 1, 1);
        assert_inclusive(stats, fdup(), f(3), 1, 1);
    }

    // Test that we can write stats correctly.
    #[test]
    fn stats_write() {
        let expected = "# callgrind format
version: 1
creator: rbspy
events: Samples

fl=file1.rb
fn=func1
1 1
cfl=file1.rb
cfn=funcX
calls=1 42
1 1
cfl=file2.rb
cfn=func2
calls=2 2
1 3
cfl=file3.rb
cfn=func3
calls=1 3
1 1

fl=file1.rb
fn=funcX
42 0
cfl=file3.rb
cfn=func3
calls=1 3
42 1

fl=file2.rb
fn=func2
2 2
cfl=file3.rb
cfn=func3
calls=1 3
2 1

fl=file3.rb
fn=func3
3 3
";

        let mut buf: Vec<u8> = Vec::new();
        build_test_stats()
            .write(&mut buf)
            .expect("Callgrind write failed");
        let actual = String::from_utf8(buf).expect("Callgrind output not utf8");
        assert_eq!(actual, expected, "Unexpected callgrind output");
    }
}