v8_heap_parser/
decoder.rs

1use std::{borrow::Cow, fmt, fmt::Display, rc::Rc};
2
3#[cfg(target_arch = "wasm32")]
4use wasm_bindgen::prelude::*;
5
6use crate::{graph::Graph, perf::PerfCounter};
7
8use serde::{
9    de::{self, DeserializeSeed, SeqAccess, Visitor},
10    Deserialize, Deserializer,
11};
12
13#[allow(dead_code)]
14pub(crate) struct Root {
15    pub snapshot: Snapshot,
16    pub graph: PetGraph,
17    pub strings: Rc<Vec<String>>,
18    pub trace_function_infos: Vec<u32>,
19    pub trace_tree: Vec<u32>,
20    pub samples: Vec<u32>,
21    pub locations: Vec<u32>,
22}
23
24#[derive(Deserialize)]
25pub(crate) struct Snapshot {
26    pub meta: Meta,
27    pub root_index: Option<usize>,
28    // unused:
29    // pub node_count: u64,
30    // pub edge_count: u64,
31    // pub trace_function_count: u64,
32}
33
34#[derive(Deserialize)]
35#[serde(untagged)]
36pub(crate) enum StringOrArray {
37    Single(String),
38    Arr(Vec<String>),
39}
40
41#[derive(Deserialize)]
42#[allow(dead_code)]
43pub(crate) struct Meta {
44    pub node_fields: Vec<String>,
45    pub node_types: Vec<StringOrArray>,
46    pub edge_fields: Vec<String>,
47    pub edge_types: Vec<StringOrArray>,
48    pub trace_function_info_fields: Vec<String>,
49    pub trace_node_fields: Vec<String>,
50    pub sample_fields: Vec<String>,
51}
52
53impl<'de> Deserialize<'de> for Root {
54    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
55    where
56        D: Deserializer<'de>,
57    {
58        deserializer.deserialize_map(RootVisitor)
59    }
60}
61
62struct RootVisitor;
63
64impl<'de> Visitor<'de> for RootVisitor {
65    // This Visitor constructs a single Vec<T> to hold the flattened
66    // contents of the inner arrays.
67    type Value = Root;
68
69    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
70        write!(formatter, "an object map")
71    }
72
73    fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
74    where
75        A: de::MapAccess<'de>,
76    {
77        let mut snapshot: Option<Snapshot> = None;
78        let mut graph: Option<PetGraph> = None;
79        let mut has_edges = false;
80
81        let mut trace_function_infos = None;
82        let mut trace_tree = None;
83        let mut samples = None;
84        let mut locations = None;
85        let mut strings: Option<Vec<String>> = None;
86
87        while let Some(key) = map.next_key::<Cow<'_, str>>()? {
88            match key.as_ref() {
89                "snapshot" => {
90                    snapshot = map.next_value()?;
91                }
92                "nodes" => {
93                    let snapshot = snapshot.as_ref().ok_or_else(|| {
94                        de::Error::custom("expected 'snapshot' before 'nodes' field")
95                    })?;
96
97                    graph = Some(map.next_value_seed(NodesDeserializer(snapshot))?);
98                }
99                "edges" => {
100                    let snapshot = snapshot.as_ref().ok_or_else(|| {
101                        de::Error::custom("expected 'snapshot' before 'edges' field")
102                    })?;
103                    let graph = graph.as_mut().ok_or_else(|| {
104                        de::Error::custom("expected 'nodes' before 'edges' field")
105                    })?;
106
107                    map.next_value_seed(EdgesDeserializer(snapshot, graph))?;
108                    has_edges = true;
109                }
110
111                "trace_function_infos" => {
112                    trace_function_infos = Some(map.next_value()?);
113                }
114                "trace_tree" => {
115                    trace_tree = Some(map.next_value()?);
116                }
117                "samples" => {
118                    samples = Some(map.next_value()?);
119                }
120                "locations" => {
121                    locations = Some(map.next_value()?);
122                }
123                "strings" => {
124                    strings = Some(map.next_value()?);
125                }
126                _ => {}
127            }
128        }
129
130        if !has_edges {
131            return Err(de::Error::missing_field("edges"));
132        }
133        let snapshot = snapshot.ok_or_else(|| de::Error::missing_field("snapshot"))?;
134        let mut graph = graph.ok_or_else(|| de::Error::missing_field("nodes"))?;
135        let strings = Rc::new(strings.ok_or_else(|| de::Error::missing_field("strings"))?);
136
137        for node in graph.node_weights_mut() {
138            node.strings = Some(strings.clone());
139        }
140
141        Ok(Root {
142            snapshot,
143            graph,
144            trace_function_infos: trace_function_infos.unwrap_or_default(),
145            trace_tree: trace_tree.unwrap_or_default(),
146            samples: samples.unwrap_or_default(),
147            locations: locations.unwrap_or_default(),
148            strings,
149        })
150    }
151}
152struct NodesDeserializer<'a>(&'a Snapshot);
153
154impl<'de, 'a> DeserializeSeed<'de> for NodesDeserializer<'a> {
155    // The return type of the `deserialize` method. This implementation
156    // appends onto an existing vector but does not create any new data
157    // structure, so the return type is ().
158    type Value = PetGraph;
159
160    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
161    where
162        D: Deserializer<'de>,
163    {
164        // Visitor implementation that will walk an inner array of the JSON
165        // input.
166        struct NodesVisitor<'a>(&'a Snapshot);
167
168        impl<'de, 'a> Visitor<'de> for NodesVisitor<'a> {
169            type Value = PetGraph;
170
171            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
172                write!(formatter, "an array of integers")
173            }
174
175            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
176            where
177                A: SeqAccess<'de>,
178            {
179                let mut name_offset = None;
180                let mut id_offset = None;
181                let mut self_size_offset = None;
182                let mut edge_count_offset = None;
183                let mut trace_node_id_offset = None;
184                let mut detachedness_offset = None;
185                let mut type_offset = None;
186
187                for (i, field) in self.0.meta.node_fields.iter().enumerate() {
188                    match field.as_str() {
189                        "name" => name_offset = Some(i),
190                        "id" => id_offset = Some(i),
191                        "self_size" => self_size_offset = Some(i),
192                        "edge_count" => edge_count_offset = Some(i),
193                        "trace_node_id" => trace_node_id_offset = Some(i),
194                        "detachedness" => detachedness_offset = Some(i),
195                        "type" => type_offset = Some(i),
196                        _ => {}
197                    }
198                }
199
200                let name_offset = name_offset.ok_or(de::Error::missing_field("name"))?;
201                let type_offset = type_offset.ok_or(de::Error::missing_field("type"))?;
202                let type_types = match self.0.meta.node_types.get(type_offset) {
203                    None => return Err(de::Error::missing_field("type")),
204                    Some(StringOrArray::Single(_)) => {
205                        return Err(de::Error::custom("node `type` should be an array"))
206                    }
207                    Some(StringOrArray::Arr(a)) => a,
208                };
209
210                let row_size = self.0.meta.node_fields.len();
211
212                let mut graph: PetGraph = petgraph::Graph::new();
213                let mut buf: Vec<u64> = vec![0; row_size];
214                let mut buf_i /* the vampire slayer */ = 0;
215                while let Some(elem) = seq.next_element()? {
216                    buf[buf_i] = elem;
217                    buf_i += 1;
218
219                    if buf_i == row_size {
220                        buf_i = 0;
221                        graph.add_node(Node {
222                            strings: None,
223                            name_index: buf[name_offset] as usize,
224                            typ: NodeType::from_str(type_types, buf[type_offset] as usize),
225                            self_size: self_size_offset.map(|o| buf[o]).unwrap_or_default(),
226                            edge_count: edge_count_offset
227                                .map(|o| buf[o] as usize)
228                                .unwrap_or_default(),
229                            trace_node_id: trace_node_id_offset.map(|o| buf[o]).unwrap_or_default(),
230                            detachedness: detachedness_offset
231                                .map(|o| buf[o] as u32)
232                                .unwrap_or_default(),
233                            id: id_offset.map(|o| buf[o] as u32).unwrap_or_default(),
234                        });
235                    }
236                }
237
238                Ok(graph)
239            }
240        }
241
242        deserializer.deserialize_seq(NodesVisitor(self.0))
243    }
244}
245
246struct EdgesDeserializer<'a>(&'a Snapshot, &'a mut PetGraph);
247
248impl<'de, 'a> DeserializeSeed<'de> for EdgesDeserializer<'a> {
249    // The return type of the `deserialize` method. This implementation
250    // appends onto an existing vector but does not create any new data
251    // structure, so the return type is ().
252    type Value = ();
253
254    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
255    where
256        D: Deserializer<'de>,
257    {
258        // Visitor implementation that will walk an inner array of the JSON
259        // input.
260        struct EdgesVisitor<'a>(&'a Snapshot, &'a mut PetGraph);
261
262        impl<'de, 'a> Visitor<'de> for EdgesVisitor<'a> {
263            type Value = ();
264
265            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
266                write!(formatter, "an array of integers")
267            }
268
269            fn visit_seq<A>(self, mut seq: A) -> Result<(), A::Error>
270            where
271                A: SeqAccess<'de>,
272            {
273                let mut to_node_offset = None;
274                let mut name_or_index_offset = None;
275                let mut type_offset = None;
276
277                for (i, field) in self.0.meta.edge_fields.iter().enumerate() {
278                    match field.as_str() {
279                        "to_node" => to_node_offset = Some(i),
280                        "name_or_index" => name_or_index_offset = Some(i),
281                        "type" => type_offset = Some(i),
282                        _ => {}
283                    }
284                }
285
286                let to_node_offset = to_node_offset.ok_or(de::Error::missing_field("to_node"))?;
287                let type_offset = type_offset.ok_or(de::Error::missing_field("type"))?;
288                let name_or_index_offset =
289                    name_or_index_offset.ok_or(de::Error::missing_field("name_or_index"))?;
290                let type_types = match self.0.meta.edge_types.get(type_offset) {
291                    None => return Err(de::Error::missing_field("type")),
292                    Some(StringOrArray::Single(_)) => {
293                        return Err(de::Error::custom("edge `type` should be an array"))
294                    }
295                    Some(StringOrArray::Arr(a)) => a,
296                };
297
298                let row_size = self.0.meta.edge_fields.len();
299                let node_row_size = self.0.meta.node_fields.len();
300
301                // Each node own the next "edge_count" edges in the array.
302                let unexpected_end = || de::Error::custom("unexpected end of edges");
303                let nodes_len = self.1.raw_nodes().len();
304                for from_index in 0..nodes_len {
305                    let edge_count = self.1.raw_nodes()[from_index].weight.edge_count;
306                    let from_index = petgraph::graph::NodeIndex::new(from_index);
307                    for _ in 0..edge_count {
308                        // we know that all the offsets exists and are within
309                        // the row_size, so they must be assigned before getting
310                        // to the add_edge method.
311                        let mut typ: usize = unsafe { std::mem::zeroed() };
312                        let mut to_index: usize = unsafe { std::mem::zeroed() };
313                        let mut name_or_index: NameOrIndex = unsafe { std::mem::zeroed() };
314
315                        for i in 0..row_size {
316                            match i {
317                                i if i == to_node_offset => {
318                                    to_index = seq.next_element()?.ok_or_else(unexpected_end)?;
319                                }
320                                i if i == name_or_index_offset => {
321                                    name_or_index =
322                                        seq.next_element()?.ok_or_else(unexpected_end)?;
323                                }
324                                i if i == type_offset => {
325                                    typ = seq.next_element()?.ok_or_else(unexpected_end)?;
326                                }
327                                _ => {}
328                            }
329                        }
330
331                        self.1.add_edge(
332                            from_index,
333                            petgraph::graph::NodeIndex::new(to_index / node_row_size),
334                            PGNodeEdge {
335                                typ: EdgeType::from_str(type_types, typ),
336                                name: name_or_index,
337                            },
338                        );
339                    }
340                }
341
342                Ok(())
343            }
344        }
345
346        deserializer.deserialize_seq(EdgesVisitor(self.0, self.1))
347    }
348}
349
350pub(crate) type PetGraph = petgraph::Graph<Node, PGNodeEdge>;
351
352#[derive(Debug)]
353pub struct Node {
354    name_index: usize,
355    strings: Option<Rc<Vec<String>>>,
356
357    pub typ: NodeType,
358    pub id: u32,
359    pub self_size: u64,
360    pub edge_count: usize,
361    pub trace_node_id: u64,
362    pub detachedness: u32,
363}
364
365impl Node {
366    pub fn name(&self) -> &str {
367        let strs = unsafe { self.strings.as_ref().unwrap_unchecked() };
368        &strs[self.name_index]
369    }
370
371    pub(crate) fn is_document_dom_trees_root(&self) -> bool {
372        self.typ == NodeType::Syntheic && self.name() == "(Document DOM trees)'"
373    }
374
375    pub fn class_name(&self) -> &str {
376        match &self.typ {
377            NodeType::Object | NodeType::Native => self.name(),
378            t => t.as_class_name(),
379        }
380    }
381}
382
383#[derive(Debug, PartialEq, Eq, Copy, Clone)]
384#[non_exhaustive]
385pub enum NodeType {
386    Hidden,
387    Array,
388    String,
389    Object,
390    Code,
391    Closure,
392    RegExp,
393    Number,
394    Native,
395    Syntheic,
396    ConcatString,
397    SliceString,
398    BigInt,
399    Other(usize),
400}
401
402impl NodeType {
403    fn as_class_name(&self) -> &'static str {
404        match self {
405            Self::Hidden => "(system)",
406            Self::Array => "(array)",
407            Self::String => "(string)",
408            Self::Object => "(object)",
409            Self::Code => "(compiled code)",
410            Self::Closure => "(closure)",
411            Self::RegExp => "(regexp)",
412            Self::Number => "(number)",
413            Self::Native => "(native)",
414            Self::Syntheic => "(synthetic)",
415            Self::ConcatString => "(concatenated string)",
416            Self::SliceString => "(sliced string)",
417            Self::BigInt => "(bigint)",
418            Self::Other(_) => "(unknown)",
419        }
420    }
421
422    fn from_str(strings: &[String], typ: usize) -> Self {
423        match strings.get(typ).map(|s| s.as_str()) {
424            Some("hidden") => Self::Hidden,
425            Some("array") => Self::Array,
426            Some("string") => Self::String,
427            Some("object") => Self::Object,
428            Some("code") => Self::Code,
429            Some("closure") => Self::Closure,
430            Some("regexp") => Self::RegExp,
431            Some("number") => Self::Number,
432            Some("native") => Self::Native,
433            Some("synthetic") => Self::Syntheic,
434            Some("concatenated string") => Self::ConcatString,
435            Some("sliced string") => Self::SliceString,
436            Some("bigint") => Self::BigInt,
437            _ => Self::Other(typ),
438        }
439    }
440}
441
442#[derive(Debug)]
443pub struct NodeEdge {
444    pub typ: EdgeType,
445    pub from_index: usize,
446    pub to_index: usize,
447    pub name: NameOrIndex,
448}
449
450#[derive(Debug)]
451pub struct PGNodeEdge {
452    pub typ: EdgeType,
453    pub name: NameOrIndex,
454}
455
456#[derive(Debug, Deserialize)]
457#[serde(untagged)]
458pub enum NameOrIndex {
459    Index(usize),
460    Name(String),
461}
462
463#[derive(Debug, Clone, Copy, PartialEq, Eq)]
464#[non_exhaustive]
465#[repr(u8)]
466pub enum EdgeType {
467    Context,
468    Element,
469    Property,
470    Internal,
471    Hidden,
472    Shortcut,
473    Weak,
474    Invisible,
475    Other(usize),
476}
477
478impl From<EdgeType> for u8 {
479    fn from(t: EdgeType) -> u8 {
480        match t {
481            EdgeType::Context => 0,
482            EdgeType::Element => 1,
483            EdgeType::Property => 2,
484            EdgeType::Internal => 3,
485            EdgeType::Hidden => 4,
486            EdgeType::Shortcut => 5,
487            EdgeType::Weak => 6,
488            EdgeType::Invisible => 7,
489            EdgeType::Other(_) => 8,
490        }
491    }
492}
493
494impl Display for EdgeType {
495    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
496        match self {
497            EdgeType::Context => write!(f, "context"),
498            EdgeType::Element => write!(f, "element"),
499            EdgeType::Property => write!(f, "property"),
500            EdgeType::Internal => write!(f, "internal"),
501            EdgeType::Hidden => write!(f, "hidden"),
502            EdgeType::Shortcut => write!(f, "shortcut"),
503            EdgeType::Weak => write!(f, "weak"),
504            EdgeType::Invisible => write!(f, "invisible"),
505            EdgeType::Other(s) => write!(f, "other<{}>", s),
506        }
507    }
508}
509
510impl EdgeType {
511    fn from_str(strings: &[String], index: usize) -> Self {
512        match strings.get(index).map(|s| s.as_str()) {
513            Some("context") => Self::Context,
514            Some("element") => Self::Element,
515            Some("property") => Self::Property,
516            Some("internal") => Self::Internal,
517            Some("hidden") => Self::Hidden,
518            Some("shortcut") => Self::Shortcut,
519            Some("weak") => Self::Weak,
520            Some("invisible") => Self::Invisible,
521            _ => Self::Other(index),
522        }
523    }
524}
525
526pub fn decode_reader(input: impl std::io::Read) -> Result<Graph, serde_json::Error> {
527    // todo@connor412: parsing the JSON takes the majority of time when parsing
528    // a graph. We might be faster if we use DeserializeSeed to parse data
529    // directly into the graph structure.
530    // https://docs.rs/serde/latest/serde/de/trait.DeserializeSeed.html
531    let perf = PerfCounter::new("json_decode");
532    serde_json::from_reader(input).map(|b| {
533        drop(perf);
534        to_graph(b)
535    })
536}
537
538pub fn decode_slice(input: &[u8]) -> Result<Graph, serde_json::Error> {
539    let perf = PerfCounter::new("json_decode");
540    serde_json::from_slice(input).map(|b| {
541        drop(perf);
542        to_graph(b)
543    })
544}
545
546pub fn decode_str(input: &str) -> Result<Graph, serde_json::Error> {
547    let perf = PerfCounter::new("json_decode");
548    serde_json::from_str(input).map(|b| {
549        drop(perf);
550        to_graph(b)
551    })
552}
553
554#[cfg(target_arch = "wasm32")]
555#[wasm_bindgen]
556pub fn decode_bytes(input: &[u8]) -> std::result::Result<Graph, String> {
557    decode_slice(input).map_err(|e| e.to_string())
558}
559
560fn to_graph(root: Root) -> Graph {
561    let _perf = PerfCounter::new("init_graph");
562    let root_index = root.snapshot.root_index.unwrap_or_default();
563    Graph::new(root.graph, root_index, root.strings)
564}
565
566#[cfg(test)]
567mod tests {
568    use super::*;
569    use std::fs;
570
571    #[test]
572    fn test_basic_heapsnapshot() {
573        let contents = fs::read("test/basic.heapsnapshot").unwrap();
574        decode_slice(&contents).expect("expect no errors");
575    }
576}