Skip to main content

cpuprofile_to_pprof/
lib.rs

1//! Convert Node V8 `.cpuprofile` (Inspector `Profiler.Profile`) JSON into
2//! gzip'd pprof protobuf.
3//!
4//! Port of the TypeScript implementation that lived in
5//! `@mizchi/pprof-tools/cpuprofile-to-pprof`. The cpuprofile schema is a
6//! tree of nodes (`nodes[i].children` lists child ids) plus a `samples`
7//! array of leaf node ids and a `timeDeltas` array (μs) holding the
8//! elapsed time between consecutive samples. We invert children into a
9//! parent map, aggregate samples by leaf node, and emit one pprof Sample
10//! per unique leaf with both `samples/count` and `cpu/nanoseconds`
11//! values.
12//!
13//! Symbol names go through a caller-supplied demangler. Defaults to
14//! [`moonbit_demangle::demangle`]; pass identity (`|s| s.into()`) when
15//! profiling non-MoonBit code.
16
17#![forbid(unsafe_code)]
18#![warn(missing_docs)]
19
20use std::collections::HashMap;
21use std::io::Write;
22
23use anyhow::Result;
24use flate2::Compression;
25use flate2::write::GzEncoder;
26use prost::Message;
27use serde::Deserialize;
28
29use firefox_to_pprof::proto;
30
31/// Parsed shape of `Profiler.Profile` (`.cpuprofile` JSON).
32#[derive(Debug, Deserialize)]
33#[serde(rename_all = "camelCase")]
34pub struct CpuProfile {
35    /// All profile nodes. Each node carries its `id`, the `callFrame`,
36    /// and the ids of its children.
37    pub nodes: Vec<CpuNode>,
38    /// One node id per sample (the leaf node hit at sampling time).
39    pub samples: Vec<i64>,
40    /// Microseconds between consecutive samples (parallel to `samples`).
41    #[serde(default)]
42    pub time_deltas: Vec<i64>,
43    /// Profile start time in microseconds since epoch (V8 monotonic).
44    #[serde(default)]
45    pub start_time: i64,
46    /// Profile end time in microseconds since epoch.
47    #[serde(default)]
48    pub end_time: i64,
49}
50
51/// One node in the cpuprofile call tree.
52#[derive(Debug, Deserialize)]
53#[serde(rename_all = "camelCase")]
54pub struct CpuNode {
55    /// V8 node id (referenced from `CpuProfile::samples`).
56    pub id: i64,
57    /// Function + source location for this node.
58    pub call_frame: CallFrame,
59    /// Ids of child nodes (one level deeper in the call tree).
60    #[serde(default)]
61    pub children: Vec<i64>,
62}
63
64/// V8 call frame (`CallFrame` in the Inspector domain).
65#[derive(Debug, Deserialize)]
66#[serde(rename_all = "camelCase")]
67pub struct CallFrame {
68    /// Function name as V8 reports it. May be empty for anonymous fns.
69    #[serde(default)]
70    pub function_name: String,
71    /// Script URL (file:// or http:// or empty for native frames).
72    #[serde(default)]
73    pub url: String,
74    /// V8 script id (string in newer versions, number in older). We
75    /// only use it as a dedup key so storing the raw JSON form is
76    /// sufficient.
77    #[serde(default)]
78    pub script_id: serde_json::Value,
79    /// Zero-based line number, or -1 if unknown.
80    #[serde(default = "neg_one")]
81    pub line_number: i32,
82    /// Zero-based column number, or -1 if unknown.
83    #[serde(default = "neg_one")]
84    pub column_number: i32,
85}
86
87fn neg_one() -> i32 {
88    -1
89}
90
91/// Demangler hook — `(name) → pretty name`. Defaults to
92/// [`moonbit_demangle::demangle`].
93pub type DemangleFn = Box<dyn Fn(&str) -> String>;
94
95/// Conversion stats returned alongside the encoded bytes.
96#[derive(Debug, Clone, Copy)]
97pub struct Stats {
98    /// Number of pprof Sample entries emitted (= unique leaf nodes hit).
99    pub samples: usize,
100    /// Number of unique pprof Function entries.
101    pub functions: usize,
102    /// Number of unique pprof Location entries.
103    pub locations: usize,
104}
105
106/// Output of [`Builder::encode`] — gzip'd bytes plus counts.
107pub struct EncodedProfile {
108    /// gzip-compressed pprof protobuf, ready to be written to disk.
109    pub encoded: Vec<u8>,
110    /// Conversion stats (sample/function/location counts).
111    pub stats: Stats,
112}
113
114/// Builds a pprof Profile from a parsed [`CpuProfile`].
115///
116/// ```no_run
117/// use cpuprofile_to_pprof::{CpuProfile, Builder};
118/// # fn doctest(json: &str) -> anyhow::Result<()> {
119/// let profile: CpuProfile = serde_json::from_str(json)?;
120/// let out = Builder::new(profile).encode()?;
121/// std::fs::write("out.pb.gz", out.encoded)?;
122/// # Ok(()) }
123/// ```
124pub struct Builder {
125    profile: CpuProfile,
126    demangle: DemangleFn,
127    mapping_filename: Option<String>,
128}
129
130impl Builder {
131    /// Construct a builder from a parsed cpuprofile. Defaults the
132    /// demangler to [`moonbit_demangle::demangle`].
133    pub fn new(profile: CpuProfile) -> Self {
134        Self {
135            profile,
136            demangle: Box::new(|s| moonbit_demangle::demangle(s)),
137            mapping_filename: None,
138        }
139    }
140
141    /// Override the symbol demangler. Pass `|s| s.into()` to disable.
142    pub fn demangle_with(mut self, f: impl Fn(&str) -> String + 'static) -> Self {
143        self.demangle = Box::new(f);
144        self
145    }
146
147    /// Override the pprof Mapping's filename. Defaults to empty.
148    pub fn mapping_filename(mut self, s: impl Into<String>) -> Self {
149        self.mapping_filename = Some(s.into());
150        self
151    }
152
153    /// Encode the profile to gzip'd protobuf bytes.
154    pub fn encode(self) -> Result<EncodedProfile> {
155        let (profile, stats) = self.build();
156        let mut buf = Vec::new();
157        profile.encode(&mut buf)?;
158        let mut gz = GzEncoder::new(Vec::new(), Compression::default());
159        gz.write_all(&buf)?;
160        Ok(EncodedProfile {
161            encoded: gz.finish()?,
162            stats,
163        })
164    }
165
166    fn build(self) -> (proto::Profile, Stats) {
167        let Self {
168            profile,
169            demangle,
170            mapping_filename,
171        } = self;
172        let mut state = State::new(demangle);
173        let mapping_filename_id =
174            state.intern(mapping_filename.as_deref().unwrap_or(""));
175
176        // Index nodes by id and invert children -> parent.
177        let mut node_by_id: HashMap<i64, &CpuNode> = HashMap::with_capacity(profile.nodes.len());
178        for n in &profile.nodes {
179            node_by_id.insert(n.id, n);
180        }
181        let mut parent: HashMap<i64, i64> = HashMap::new();
182        for n in &profile.nodes {
183            for &c in &n.children {
184                parent.insert(c, n.id);
185            }
186        }
187
188        // Aggregate count + microseconds per leaf node id.
189        let mut count_by_node: HashMap<i64, i64> = HashMap::new();
190        let mut us_by_node: HashMap<i64, i64> = HashMap::new();
191        let mut total_us: i64 = 0;
192        for (i, &nid) in profile.samples.iter().enumerate() {
193            let dt = profile.time_deltas.get(i).copied().unwrap_or(0);
194            *count_by_node.entry(nid).or_default() += 1;
195            *us_by_node.entry(nid).or_default() += dt;
196            total_us += dt;
197        }
198
199        // Emit one Sample per unique leaf node.
200        let mut samples_emitted = 0usize;
201        let mut stack_cache: HashMap<i64, Vec<u64>> = HashMap::new();
202        for (&nid, &count) in &count_by_node {
203            let us = us_by_node.get(&nid).copied().unwrap_or(0);
204            let location_id = stack_for_node(
205                nid,
206                &node_by_id,
207                &parent,
208                &mut state,
209                &mut stack_cache,
210            );
211            state.samples.push(proto::Sample {
212                location_id,
213                value: vec![count, us * 1000],
214                label: vec![],
215            });
216            samples_emitted += 1;
217        }
218
219        let stats = Stats {
220            samples: samples_emitted,
221            functions: state.functions.len(),
222            locations: state.locations.len(),
223        };
224
225        let period_ns = if profile.samples.is_empty() {
226            1
227        } else {
228            let avg_us = (total_us as f64 / profile.samples.len() as f64).round() as i64;
229            (avg_us * 1000).max(1)
230        };
231
232        let time_nanos = profile.start_time.saturating_mul(1000);
233        let duration_nanos = profile
234            .end_time
235            .saturating_sub(profile.start_time)
236            .saturating_mul(1000);
237
238        (
239            state.finish(mapping_filename_id, period_ns, time_nanos, duration_nanos),
240            stats,
241        )
242    }
243}
244
245fn stack_for_node(
246    leaf: i64,
247    by_id: &HashMap<i64, &CpuNode>,
248    parent: &HashMap<i64, i64>,
249    state: &mut State,
250    cache: &mut HashMap<i64, Vec<u64>>,
251) -> Vec<u64> {
252    if let Some(cached) = cache.get(&leaf) {
253        return cached.clone();
254    }
255    let mut stack: Vec<u64> = Vec::new();
256    let mut cur = Some(leaf);
257    while let Some(nid) = cur {
258        let Some(node) = by_id.get(&nid) else { break };
259        stack.push(state.intern_location(node));
260        cur = parent.get(&nid).copied();
261    }
262    cache.insert(leaf, stack.clone());
263    stack
264}
265
266struct State {
267    strings: Vec<String>,
268    string_index: HashMap<String, i64>,
269    functions: Vec<proto::Function>,
270    func_index: HashMap<String, u64>,
271    locations: Vec<proto::Location>,
272    loc_by_node: HashMap<i64, u64>,
273    samples: Vec<proto::Sample>,
274    demangle: DemangleFn,
275}
276
277impl State {
278    fn new(demangle: DemangleFn) -> Self {
279        let mut me = Self {
280            strings: vec![String::new()],
281            string_index: HashMap::from([(String::new(), 0)]),
282            functions: Vec::new(),
283            func_index: HashMap::new(),
284            locations: Vec::new(),
285            loc_by_node: HashMap::new(),
286            samples: Vec::new(),
287            demangle,
288        };
289        // Pre-intern fixed strings the final ValueType slots need.
290        me.intern("samples");
291        me.intern("count");
292        me.intern("cpu");
293        me.intern("nanoseconds");
294        me
295    }
296
297    fn intern(&mut self, s: &str) -> i64 {
298        if let Some(&id) = self.string_index.get(s) {
299            return id;
300        }
301        let id = self.strings.len() as i64;
302        self.strings.push(s.to_string());
303        self.string_index.insert(s.to_string(), id);
304        id
305    }
306
307    fn intern_function(&mut self, call: &CallFrame) -> u64 {
308        let raw = if call.function_name.is_empty() {
309            "(anonymous)"
310        } else {
311            call.function_name.as_str()
312        };
313        let key = format!(
314            "{raw}\x1f{url}\x1f{sid}",
315            url = call.url,
316            sid = call.script_id,
317        );
318        if let Some(&id) = self.func_index.get(&key) {
319            return id;
320        }
321        let pretty = (self.demangle)(raw);
322        let id = (self.functions.len() + 1) as u64;
323        let name = self.intern(&pretty);
324        let system_name = self.intern(raw);
325        let filename = self.intern(&call.url);
326        let start_line = if call.line_number >= 0 {
327            call.line_number as i64 + 1
328        } else {
329            0
330        };
331        self.functions.push(proto::Function {
332            id,
333            name,
334            system_name,
335            filename,
336            start_line,
337        });
338        self.func_index.insert(key, id);
339        id
340    }
341
342    fn intern_location(&mut self, node: &CpuNode) -> u64 {
343        if let Some(&id) = self.loc_by_node.get(&node.id) {
344            return id;
345        }
346        let func_id = self.intern_function(&node.call_frame);
347        let line = if node.call_frame.line_number >= 0 {
348            node.call_frame.line_number as i64 + 1
349        } else {
350            0
351        };
352        let id = (self.locations.len() + 1) as u64;
353        self.locations.push(proto::Location {
354            id,
355            mapping_id: 1,
356            address: 0,
357            line: vec![proto::Line {
358                function_id: func_id,
359                line,
360                column: 0,
361            }],
362            is_folded: false,
363        });
364        self.loc_by_node.insert(node.id, id);
365        id
366    }
367
368    fn finish(
369        self,
370        mapping_filename: i64,
371        period_ns: i64,
372        time_nanos: i64,
373        duration_nanos: i64,
374    ) -> proto::Profile {
375        let samples_str = self.string_index["samples"];
376        let count_str = self.string_index["count"];
377        let cpu_str = self.string_index["cpu"];
378        let ns_str = self.string_index["nanoseconds"];
379        proto::Profile {
380            sample_type: vec![
381                proto::ValueType {
382                    r#type: samples_str,
383                    unit: count_str,
384                },
385                proto::ValueType {
386                    r#type: cpu_str,
387                    unit: ns_str,
388                },
389            ],
390            sample: self.samples,
391            mapping: vec![proto::Mapping {
392                id: 1,
393                memory_start: 0,
394                memory_limit: 0,
395                file_offset: 0,
396                filename: mapping_filename,
397                build_id: 0,
398                has_functions: true,
399                has_filenames: false,
400                has_line_numbers: false,
401                has_inline_frames: false,
402            }],
403            location: self.locations,
404            function: self.functions,
405            string_table: self.strings,
406            drop_frames: 0,
407            keep_frames: 0,
408            time_nanos,
409            duration_nanos,
410            period_type: Some(proto::ValueType {
411                r#type: cpu_str,
412                unit: ns_str,
413            }),
414            period: period_ns,
415            comment: vec![],
416            default_sample_type: 0,
417            doc_url: 0,
418        }
419    }
420}
421
422#[cfg(test)]
423mod tests {
424    use super::*;
425
426    fn synth_profile() -> CpuProfile {
427        // Tiny three-node tree: root -> child_a -> leaf
428        CpuProfile {
429            nodes: vec![
430                CpuNode {
431                    id: 1,
432                    call_frame: CallFrame {
433                        function_name: "(root)".into(),
434                        url: String::new(),
435                        script_id: serde_json::Value::String("0".into()),
436                        line_number: -1,
437                        column_number: -1,
438                    },
439                    children: vec![2],
440                },
441                CpuNode {
442                    id: 2,
443                    call_frame: CallFrame {
444                        function_name: "_M0FP26mizchi5bench9ackermann".into(),
445                        url: "wasm".into(),
446                        script_id: serde_json::Value::String("42".into()),
447                        line_number: 0,
448                        column_number: 0,
449                    },
450                    children: vec![3],
451                },
452                CpuNode {
453                    id: 3,
454                    call_frame: CallFrame {
455                        function_name: "_M0FP26mizchi5bench3fib".into(),
456                        url: "wasm".into(),
457                        script_id: serde_json::Value::String("42".into()),
458                        line_number: 1,
459                        column_number: 0,
460                    },
461                    children: vec![],
462                },
463            ],
464            samples: vec![3, 3, 2],
465            time_deltas: vec![1000, 1000, 500],
466            start_time: 0,
467            end_time: 2500,
468        }
469    }
470
471    #[test]
472    fn builds_and_demangles() {
473        let out = Builder::new(synth_profile()).encode().unwrap();
474        assert_eq!(out.stats.samples, 2); // 2 unique leaves
475        assert_eq!(out.stats.locations, 3); // root + ackermann + fib
476        assert_eq!(out.stats.functions, 3);
477        assert!(!out.encoded.is_empty());
478    }
479
480    #[test]
481    fn identity_demangler_passes_raw_names() {
482        let out = Builder::new(synth_profile())
483            .demangle_with(|s| s.to_string())
484            .encode()
485            .unwrap();
486        // gzipped bytes — just verify the encoder ran with the override.
487        assert!(out.encoded.len() > 50);
488    }
489}