1#![forbid(unsafe_code)]
18#![warn(missing_docs)]
19
20use std::collections::HashMap;
21use std::io::Write;
22
23use anyhow::Result;
24use flate2::Compression;
25use flate2::write::GzEncoder;
26use prost::Message;
27use serde::Deserialize;
28
29use firefox_to_pprof::proto;
30
31#[derive(Debug, Deserialize)]
33#[serde(rename_all = "camelCase")]
34pub struct CpuProfile {
35 pub nodes: Vec<CpuNode>,
38 pub samples: Vec<i64>,
40 #[serde(default)]
42 pub time_deltas: Vec<i64>,
43 #[serde(default)]
45 pub start_time: i64,
46 #[serde(default)]
48 pub end_time: i64,
49}
50
51#[derive(Debug, Deserialize)]
53#[serde(rename_all = "camelCase")]
54pub struct CpuNode {
55 pub id: i64,
57 pub call_frame: CallFrame,
59 #[serde(default)]
61 pub children: Vec<i64>,
62}
63
64#[derive(Debug, Deserialize)]
66#[serde(rename_all = "camelCase")]
67pub struct CallFrame {
68 #[serde(default)]
70 pub function_name: String,
71 #[serde(default)]
73 pub url: String,
74 #[serde(default)]
78 pub script_id: serde_json::Value,
79 #[serde(default = "neg_one")]
81 pub line_number: i32,
82 #[serde(default = "neg_one")]
84 pub column_number: i32,
85}
86
87fn neg_one() -> i32 {
88 -1
89}
90
91pub type DemangleFn = Box<dyn Fn(&str) -> String>;
94
95#[derive(Debug, Clone, Copy)]
97pub struct Stats {
98 pub samples: usize,
100 pub functions: usize,
102 pub locations: usize,
104}
105
106pub struct EncodedProfile {
108 pub encoded: Vec<u8>,
110 pub stats: Stats,
112}
113
114pub struct Builder {
125 profile: CpuProfile,
126 demangle: DemangleFn,
127 mapping_filename: Option<String>,
128}
129
130impl Builder {
131 pub fn new(profile: CpuProfile) -> Self {
134 Self {
135 profile,
136 demangle: Box::new(|s| moonbit_demangle::demangle(s)),
137 mapping_filename: None,
138 }
139 }
140
141 pub fn demangle_with(mut self, f: impl Fn(&str) -> String + 'static) -> Self {
143 self.demangle = Box::new(f);
144 self
145 }
146
147 pub fn mapping_filename(mut self, s: impl Into<String>) -> Self {
149 self.mapping_filename = Some(s.into());
150 self
151 }
152
153 pub fn encode(self) -> Result<EncodedProfile> {
155 let (profile, stats) = self.build();
156 let mut buf = Vec::new();
157 profile.encode(&mut buf)?;
158 let mut gz = GzEncoder::new(Vec::new(), Compression::default());
159 gz.write_all(&buf)?;
160 Ok(EncodedProfile {
161 encoded: gz.finish()?,
162 stats,
163 })
164 }
165
166 fn build(self) -> (proto::Profile, Stats) {
167 let Self {
168 profile,
169 demangle,
170 mapping_filename,
171 } = self;
172 let mut state = State::new(demangle);
173 let mapping_filename_id =
174 state.intern(mapping_filename.as_deref().unwrap_or(""));
175
176 let mut node_by_id: HashMap<i64, &CpuNode> = HashMap::with_capacity(profile.nodes.len());
178 for n in &profile.nodes {
179 node_by_id.insert(n.id, n);
180 }
181 let mut parent: HashMap<i64, i64> = HashMap::new();
182 for n in &profile.nodes {
183 for &c in &n.children {
184 parent.insert(c, n.id);
185 }
186 }
187
188 let mut count_by_node: HashMap<i64, i64> = HashMap::new();
190 let mut us_by_node: HashMap<i64, i64> = HashMap::new();
191 let mut total_us: i64 = 0;
192 for (i, &nid) in profile.samples.iter().enumerate() {
193 let dt = profile.time_deltas.get(i).copied().unwrap_or(0);
194 *count_by_node.entry(nid).or_default() += 1;
195 *us_by_node.entry(nid).or_default() += dt;
196 total_us += dt;
197 }
198
199 let mut samples_emitted = 0usize;
201 let mut stack_cache: HashMap<i64, Vec<u64>> = HashMap::new();
202 for (&nid, &count) in &count_by_node {
203 let us = us_by_node.get(&nid).copied().unwrap_or(0);
204 let location_id = stack_for_node(
205 nid,
206 &node_by_id,
207 &parent,
208 &mut state,
209 &mut stack_cache,
210 );
211 state.samples.push(proto::Sample {
212 location_id,
213 value: vec![count, us * 1000],
214 label: vec![],
215 });
216 samples_emitted += 1;
217 }
218
219 let stats = Stats {
220 samples: samples_emitted,
221 functions: state.functions.len(),
222 locations: state.locations.len(),
223 };
224
225 let period_ns = if profile.samples.is_empty() {
226 1
227 } else {
228 let avg_us = (total_us as f64 / profile.samples.len() as f64).round() as i64;
229 (avg_us * 1000).max(1)
230 };
231
232 let time_nanos = profile.start_time.saturating_mul(1000);
233 let duration_nanos = profile
234 .end_time
235 .saturating_sub(profile.start_time)
236 .saturating_mul(1000);
237
238 (
239 state.finish(mapping_filename_id, period_ns, time_nanos, duration_nanos),
240 stats,
241 )
242 }
243}
244
245fn stack_for_node(
246 leaf: i64,
247 by_id: &HashMap<i64, &CpuNode>,
248 parent: &HashMap<i64, i64>,
249 state: &mut State,
250 cache: &mut HashMap<i64, Vec<u64>>,
251) -> Vec<u64> {
252 if let Some(cached) = cache.get(&leaf) {
253 return cached.clone();
254 }
255 let mut stack: Vec<u64> = Vec::new();
256 let mut cur = Some(leaf);
257 while let Some(nid) = cur {
258 let Some(node) = by_id.get(&nid) else { break };
259 stack.push(state.intern_location(node));
260 cur = parent.get(&nid).copied();
261 }
262 cache.insert(leaf, stack.clone());
263 stack
264}
265
266struct State {
267 strings: Vec<String>,
268 string_index: HashMap<String, i64>,
269 functions: Vec<proto::Function>,
270 func_index: HashMap<String, u64>,
271 locations: Vec<proto::Location>,
272 loc_by_node: HashMap<i64, u64>,
273 samples: Vec<proto::Sample>,
274 demangle: DemangleFn,
275}
276
277impl State {
278 fn new(demangle: DemangleFn) -> Self {
279 let mut me = Self {
280 strings: vec![String::new()],
281 string_index: HashMap::from([(String::new(), 0)]),
282 functions: Vec::new(),
283 func_index: HashMap::new(),
284 locations: Vec::new(),
285 loc_by_node: HashMap::new(),
286 samples: Vec::new(),
287 demangle,
288 };
289 me.intern("samples");
291 me.intern("count");
292 me.intern("cpu");
293 me.intern("nanoseconds");
294 me
295 }
296
297 fn intern(&mut self, s: &str) -> i64 {
298 if let Some(&id) = self.string_index.get(s) {
299 return id;
300 }
301 let id = self.strings.len() as i64;
302 self.strings.push(s.to_string());
303 self.string_index.insert(s.to_string(), id);
304 id
305 }
306
307 fn intern_function(&mut self, call: &CallFrame) -> u64 {
308 let raw = if call.function_name.is_empty() {
309 "(anonymous)"
310 } else {
311 call.function_name.as_str()
312 };
313 let key = format!(
314 "{raw}\x1f{url}\x1f{sid}",
315 url = call.url,
316 sid = call.script_id,
317 );
318 if let Some(&id) = self.func_index.get(&key) {
319 return id;
320 }
321 let pretty = (self.demangle)(raw);
322 let id = (self.functions.len() + 1) as u64;
323 let name = self.intern(&pretty);
324 let system_name = self.intern(raw);
325 let filename = self.intern(&call.url);
326 let start_line = if call.line_number >= 0 {
327 call.line_number as i64 + 1
328 } else {
329 0
330 };
331 self.functions.push(proto::Function {
332 id,
333 name,
334 system_name,
335 filename,
336 start_line,
337 });
338 self.func_index.insert(key, id);
339 id
340 }
341
342 fn intern_location(&mut self, node: &CpuNode) -> u64 {
343 if let Some(&id) = self.loc_by_node.get(&node.id) {
344 return id;
345 }
346 let func_id = self.intern_function(&node.call_frame);
347 let line = if node.call_frame.line_number >= 0 {
348 node.call_frame.line_number as i64 + 1
349 } else {
350 0
351 };
352 let id = (self.locations.len() + 1) as u64;
353 self.locations.push(proto::Location {
354 id,
355 mapping_id: 1,
356 address: 0,
357 line: vec![proto::Line {
358 function_id: func_id,
359 line,
360 column: 0,
361 }],
362 is_folded: false,
363 });
364 self.loc_by_node.insert(node.id, id);
365 id
366 }
367
368 fn finish(
369 self,
370 mapping_filename: i64,
371 period_ns: i64,
372 time_nanos: i64,
373 duration_nanos: i64,
374 ) -> proto::Profile {
375 let samples_str = self.string_index["samples"];
376 let count_str = self.string_index["count"];
377 let cpu_str = self.string_index["cpu"];
378 let ns_str = self.string_index["nanoseconds"];
379 proto::Profile {
380 sample_type: vec![
381 proto::ValueType {
382 r#type: samples_str,
383 unit: count_str,
384 },
385 proto::ValueType {
386 r#type: cpu_str,
387 unit: ns_str,
388 },
389 ],
390 sample: self.samples,
391 mapping: vec![proto::Mapping {
392 id: 1,
393 memory_start: 0,
394 memory_limit: 0,
395 file_offset: 0,
396 filename: mapping_filename,
397 build_id: 0,
398 has_functions: true,
399 has_filenames: false,
400 has_line_numbers: false,
401 has_inline_frames: false,
402 }],
403 location: self.locations,
404 function: self.functions,
405 string_table: self.strings,
406 drop_frames: 0,
407 keep_frames: 0,
408 time_nanos,
409 duration_nanos,
410 period_type: Some(proto::ValueType {
411 r#type: cpu_str,
412 unit: ns_str,
413 }),
414 period: period_ns,
415 comment: vec![],
416 default_sample_type: 0,
417 doc_url: 0,
418 }
419 }
420}
421
422#[cfg(test)]
423mod tests {
424 use super::*;
425
426 fn synth_profile() -> CpuProfile {
427 CpuProfile {
429 nodes: vec![
430 CpuNode {
431 id: 1,
432 call_frame: CallFrame {
433 function_name: "(root)".into(),
434 url: String::new(),
435 script_id: serde_json::Value::String("0".into()),
436 line_number: -1,
437 column_number: -1,
438 },
439 children: vec![2],
440 },
441 CpuNode {
442 id: 2,
443 call_frame: CallFrame {
444 function_name: "_M0FP26mizchi5bench9ackermann".into(),
445 url: "wasm".into(),
446 script_id: serde_json::Value::String("42".into()),
447 line_number: 0,
448 column_number: 0,
449 },
450 children: vec![3],
451 },
452 CpuNode {
453 id: 3,
454 call_frame: CallFrame {
455 function_name: "_M0FP26mizchi5bench3fib".into(),
456 url: "wasm".into(),
457 script_id: serde_json::Value::String("42".into()),
458 line_number: 1,
459 column_number: 0,
460 },
461 children: vec![],
462 },
463 ],
464 samples: vec![3, 3, 2],
465 time_deltas: vec![1000, 1000, 500],
466 start_time: 0,
467 end_time: 2500,
468 }
469 }
470
471 #[test]
472 fn builds_and_demangles() {
473 let out = Builder::new(synth_profile()).encode().unwrap();
474 assert_eq!(out.stats.samples, 2); assert_eq!(out.stats.locations, 3); assert_eq!(out.stats.functions, 3);
477 assert!(!out.encoded.is_empty());
478 }
479
480 #[test]
481 fn identity_demangler_passes_raw_names() {
482 let out = Builder::new(synth_profile())
483 .demangle_with(|s| s.to_string())
484 .encode()
485 .unwrap();
486 assert!(out.encoded.len() > 50);
488 }
489}