pagegraph/
graph.rs

1use std::collections::HashMap;
2use std::convert::TryFrom;
3
4use petgraph::graphmap::DiGraphMap;
5
6use crate::types::{NodeType, EdgeType, RequestType};
7
8#[derive(Debug)]
9pub struct PageGraphDescriptor {
10    pub version: String,
11    pub about: String,
12    pub url: String,
13    pub is_root: bool,
14    pub frame_id: FrameId,
15    pub time: PageGraphTime,
16}
17
18#[derive(Debug)]
19pub struct PageGraphTime {
20    pub start: u64,
21    pub end: u64,
22}
23
24/// The main PageGraph data structure.
25#[derive(Debug)]
26pub struct PageGraph {
27    pub desc: PageGraphDescriptor,
28    pub edges: HashMap<EdgeId, Edge>,
29    pub nodes: HashMap<NodeId, Node>,
30    pub graph: DiGraphMap<NodeId, Vec<EdgeId>>,
31
32    next_edge_id: std::cell::RefCell<usize>,
33}
34
35impl PageGraph {
36    pub fn new(desc: PageGraphDescriptor, edges: HashMap<EdgeId, Edge>, nodes: HashMap<NodeId, Node>, graph: DiGraphMap<NodeId, Vec<EdgeId>>) -> Self {
37        Self {
38            desc,
39            edges,
40            nodes,
41            graph,
42            next_edge_id: std::cell::RefCell::new(usize::MAX),
43        }
44    }
45
46    /// Returns a new edge id that is guaranteed not to collide with an existing id in the graph.
47    pub(crate) fn new_edge_id(&self) -> EdgeId {
48        let new_id = EdgeId::from(self.next_edge_id.replace_with(|id| *id - 1));
49        assert!(!self.edges.contains_key(&new_id));
50        new_id
51    }
52
53    pub fn source_node<'a>(&'a self, edge: &Edge) -> &'a Node {
54        self.nodes.get(&edge.source).unwrap_or_else(|| panic!("Source node for edge {:?} could not be found in the graph", edge))
55    }
56
57    pub fn target_node<'a>(&'a self, edge: &Edge) -> &'a Node {
58        self.nodes.get(&edge.target).unwrap_or_else(|| panic!("Target node for edge {:?} could not be found in the graph", edge))
59    }
60
61    pub fn outgoing_edges<'a>(&'a self, node: &Node) -> impl Iterator<Item=&'a Edge> {
62        self.edges_iter_directed(node, petgraph::Direction::Outgoing)
63    }
64
65    pub fn incoming_edges<'a>(&'a self, node: &Node) -> impl Iterator<Item=&'a Edge> {
66        self.edges_iter_directed(node, petgraph::Direction::Incoming)
67    }
68
69    fn edges_iter_directed<'a>(&'a self, node: &Node, direction: petgraph::Direction) -> impl Iterator<Item=&'a Edge> {
70        self.graph.edges_directed(node.id, direction).map(move |(_a, _b, edge_ids)| {
71            edge_ids
72        })
73            .flatten()
74            .map(move |edge_id| {
75                self.edges.get(&edge_id).unwrap()
76            })
77    }
78
79    pub fn outgoing_neighbors<'a>(&'a self, node: &Node) -> impl Iterator<Item=&'a Node> {
80        self.nodes_iter_directed(node, petgraph::Direction::Outgoing)
81    }
82
83    pub fn incoming_neighbors<'a>(&'a self, node: &Node) -> impl Iterator<Item=&'a Node> {
84        self.nodes_iter_directed(node, petgraph::Direction::Incoming)
85    }
86
87    fn nodes_iter_directed<'a>(&'a self, node: &Node, direction: petgraph::Direction) -> impl Iterator<Item=&'a Node> {
88        self.graph.neighbors_directed(node.id, direction).map(move |node_id| {
89            self.nodes.get(&node_id).unwrap()
90        })
91    }
92}
93
94#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord, serde::Serialize)]
95struct GraphItemId {
96    id: usize,
97    frame_id: Option<FrameId>,
98}
99
100impl From<usize> for GraphItemId {
101    fn from(v: usize) -> Self {
102        Self {
103            id: v,
104            frame_id: None
105        }
106    }
107}
108
109impl TryFrom<&str> for GraphItemId {
110    type Error = ParseIdError;
111
112    fn try_from(v: &str) -> Result<Self, Self::Error> {
113        if let Some((id, frame_id)) = v.split_once(':') {
114            let id = id.parse::<usize>()?;
115            Ok(GraphItemId {
116                id,
117                frame_id: Some(FrameId::try_from(frame_id)?),
118            })
119        } else {
120            let id = v.parse::<usize>()?;
121            Ok(Self::from(id))
122        }
123    }
124}
125
126impl GraphItemId {
127    fn copy_for_frame_id(&self, frame_id: &FrameId) -> Self {
128        Self {
129            id: self.id,
130            frame_id: Some(frame_id.clone()),
131        }
132    }
133}
134
135pub trait HasFrameId {
136    fn get_frame_id(&self) -> Option<FrameId>;
137}
138
139pub fn is_same_frame_context<A: HasFrameId, B: HasFrameId>(a: A, b: B) -> bool {
140    a.get_frame_id() == b.get_frame_id()
141}
142
143/// An identifier used to reference a node.
144#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord, serde::Serialize)]
145pub struct NodeId(GraphItemId);
146
147impl From<usize> for NodeId {
148    fn from(v: usize) -> Self {
149        Self(v.into())
150    }
151}
152
153impl NodeId {
154    pub fn copy_for_frame_id(&self, frame_id: &FrameId) -> Self {
155        Self(self.0.copy_for_frame_id(frame_id))
156    }
157}
158
159impl HasFrameId for NodeId {
160    fn get_frame_id(&self) -> Option<FrameId> {
161        self.0.frame_id
162    }
163}
164
165impl std::fmt::Display for NodeId {
166    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
167        if let Some(frame_id) = self.get_frame_id() {
168            write!(f, "n{}:{}", self.0.id, frame_id)
169        } else {
170            write!(f, "n{}", self.0.id)
171        }
172    }
173}
174
175impl TryFrom<&str> for NodeId {
176    type Error = ParseIdError;
177
178    fn try_from(v: &str) -> Result<Self, Self::Error> {
179        if let Some(("", rest)) = v.split_once('n') {
180            Ok(Self(GraphItemId::try_from(rest)?))
181        } else {
182            Err(ParseIdError::MissingPrefix)
183        }
184    }
185}
186
187/// Downstream requests tree
188#[derive(serde::Serialize)]
189pub struct DownstreamRequests {
190    pub request_id: usize,
191    pub url: String,
192    pub request_type: RequestType,
193    pub node_id: NodeId,
194    pub children: Vec<DownstreamRequests>,
195}
196
197/// A node, representing a side effect of a page load.
198#[derive(Debug, Clone, serde::Serialize)]
199pub struct Node {
200    pub id: NodeId,
201    pub node_timestamp: isize,
202    pub node_type: NodeType,
203}
204
205/// An identifier used to reference an edge.
206#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord, serde::Serialize)]
207pub struct EdgeId(GraphItemId);
208
209impl From<usize> for EdgeId {
210    fn from(v: usize) -> Self {
211        EdgeId(v.into())
212    }
213}
214
215#[derive(Debug, PartialEq, Eq)]
216pub enum ParseIdError {
217    MissingPrefix,
218    ParseIntError,
219    FrameIdLength,
220}
221
222impl From<std::num::ParseIntError> for ParseIdError {
223    fn from(_: std::num::ParseIntError) -> Self {
224        Self::ParseIntError
225    }
226}
227
228impl TryFrom<&str> for EdgeId {
229    type Error = ParseIdError;
230
231    fn try_from(v: &str) -> Result<Self, Self::Error> {
232        if let Some(("", rest)) = v.split_once('e') {
233            Ok(Self(GraphItemId::try_from(rest)?))
234        } else {
235            Err(ParseIdError::MissingPrefix)
236        }
237    }
238}
239
240impl EdgeId {
241    pub fn copy_for_frame_id(&self, frame_id: &FrameId) -> Self {
242        Self(self.0.copy_for_frame_id(frame_id))
243    }
244}
245
246impl HasFrameId for EdgeId {
247    fn get_frame_id(&self) -> Option<FrameId> {
248        self.0.frame_id
249    }
250}
251
252impl std::fmt::Display for EdgeId {
253    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
254        if let Some(frame_id) = self.get_frame_id() {
255            write!(f, "e{}:{}", self.0.id, frame_id)
256        } else {
257            write!(f, "e{}", self.0.id)
258        }
259    }
260}
261
262/// An edge, representing an action taken during page load.
263#[derive(Debug, Clone, serde::Serialize)]
264pub struct Edge {
265    pub id: EdgeId,
266    pub edge_timestamp: Option<isize>,
267    pub edge_type: EdgeType,
268    pub source: NodeId,
269    pub target: NodeId,
270}
271
272impl PartialEq for Edge {
273    fn eq(&self, rhs: &Self) -> bool {
274        self.id == rhs.id
275    }
276}
277
278#[derive(PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord, serde::Serialize)]
279pub struct FrameId(u128);
280
281impl TryFrom<&str> for FrameId {
282    type Error = ParseIdError;
283    /// Chromium formats these 128-bit tokens as 32-character hexadecimal strings.
284    fn try_from(v: &str) -> Result<Self, Self::Error> {
285        if v.len() != 32 {
286            return Err(ParseIdError::FrameIdLength);
287        }
288        Ok(Self(u128::from_str_radix(v, 16)?))
289    }
290}
291
292impl std::fmt::Display for FrameId {
293    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
294        write!(f, "{:0>32X}", self.0)
295    }
296}
297
298impl std::fmt::Debug for FrameId {
299    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
300        write!(f, "\"{:0>32X}\"", self.0)
301    }
302}
303
304#[cfg(test)]
305mod id_parsing_tests {
306    use super::*;
307
308    #[test]
309    fn test_frame_id_parsing() {
310        assert_eq!(FrameId::try_from("00000000000000000000000000000000"), Ok(FrameId(0)));
311        assert_eq!(FrameId::try_from("00000000000000000000000000000001"), Ok(FrameId(1)));
312        assert_eq!(FrameId::try_from("0000000000000000000000000000000f"), Ok(FrameId(15)));
313        assert_eq!(FrameId::try_from("FfFFFFFfFffFFFfFFFFfffFFFfFFFfff"), Ok(FrameId(u128::MAX)));
314
315        assert_eq!(FrameId::try_from(" 00000000000000000000000000000000"), Err(ParseIdError::FrameIdLength));
316        assert_eq!(FrameId::try_from(" 0000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
317        assert_eq!(FrameId::try_from("0000000000000000000000000000000"), Err(ParseIdError::FrameIdLength));
318        assert_eq!(FrameId::try_from("000000000000000000000000000000000"), Err(ParseIdError::FrameIdLength));
319    }
320
321    #[test]
322    fn test_graph_item_id_parsing() {
323        assert_eq!(GraphItemId::try_from("0"), Ok(GraphItemId{ id: 0, frame_id: None }));
324        assert_eq!(GraphItemId::try_from("8"), Ok(GraphItemId{ id: 8, frame_id: None }));
325        assert_eq!(GraphItemId::try_from("200"), Ok(GraphItemId{ id: 200, frame_id: None }));
326        assert_eq!(GraphItemId::try_from("103810150"), Ok(GraphItemId{ id: 103810150, frame_id: None }));
327        assert_eq!(GraphItemId::try_from("0:00000000000000000000000000000000"), Ok(GraphItemId{ id: 0, frame_id: Some(FrameId(0)) }));
328        assert_eq!(GraphItemId::try_from("8:00000000000000000000000000000001"), Ok(GraphItemId{ id: 8, frame_id: Some(FrameId(1)) }));
329        assert_eq!(GraphItemId::try_from("200:0000000000000000000000000000000f"), Ok(GraphItemId{ id: 200, frame_id: Some(FrameId(15)) }));
330        assert_eq!(GraphItemId::try_from("103810150:FfFFFFFfFffFFFfFFFFfffFFFfFFFfff"), Ok(GraphItemId{ id: 103810150, frame_id: Some(FrameId(u128::MAX)) }));
331
332        assert_eq!(GraphItemId::try_from("38 : 00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
333        assert_eq!(GraphItemId::try_from("f8:00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
334        assert_eq!(GraphItemId::try_from(":00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
335        assert_eq!(GraphItemId::try_from("0:0000000000000000000000000000000"), Err(ParseIdError::FrameIdLength));
336        assert_eq!(GraphItemId::try_from("0:000000000000000000000000000000000"), Err(ParseIdError::FrameIdLength));
337    }
338
339    #[test]
340    fn test_edge_id_parsing() {
341        assert_eq!(EdgeId::try_from("e0"), Ok(EdgeId(GraphItemId{ id: 0, frame_id: None })));
342        assert_eq!(EdgeId::try_from("e8"), Ok(EdgeId(GraphItemId{ id: 8, frame_id: None })));
343        assert_eq!(EdgeId::try_from("e200"), Ok(EdgeId(GraphItemId{ id: 200, frame_id: None })));
344        assert_eq!(EdgeId::try_from("e103810150"), Ok(EdgeId(GraphItemId{ id: 103810150, frame_id: None })));
345        assert_eq!(EdgeId::try_from("e0:00000000000000000000000000000000"), Ok(EdgeId(GraphItemId{ id: 0, frame_id: Some(FrameId(0)) })));
346        assert_eq!(EdgeId::try_from("e8:00000000000000000000000000000001"), Ok(EdgeId(GraphItemId{ id: 8, frame_id: Some(FrameId(1)) })));
347        assert_eq!(EdgeId::try_from("e200:0000000000000000000000000000000f"), Ok(EdgeId(GraphItemId{ id: 200, frame_id: Some(FrameId(15)) })));
348        assert_eq!(EdgeId::try_from("e103810150:FfFFFFFfFffFFFfFFFFfffFFFfFFFfff"), Ok(EdgeId(GraphItemId{ id: 103810150, frame_id: Some(FrameId(u128::MAX)) })));
349
350        assert_eq!(EdgeId::try_from("n0"), Err(ParseIdError::MissingPrefix));
351        assert_eq!(EdgeId::try_from("8"), Err(ParseIdError::MissingPrefix));
352        assert_eq!(EdgeId::try_from("e 200"), Err(ParseIdError::ParseIntError));
353        assert_eq!(EdgeId::try_from("e103810150:"), Err(ParseIdError::FrameIdLength));
354        assert_eq!(EdgeId::try_from("n0:00000000000000000000000000000000"), Err(ParseIdError::MissingPrefix));
355        assert_eq!(EdgeId::try_from("0:00000000000000000000000000000000"), Err(ParseIdError::MissingPrefix));
356        assert_eq!(EdgeId::try_from("0e:00000000000000000000000000000000"), Err(ParseIdError::MissingPrefix));
357        assert_eq!(EdgeId::try_from("e:00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
358        assert_eq!(EdgeId::try_from(":00000000000000000000000000000000"), Err(ParseIdError::MissingPrefix));
359        assert_eq!(EdgeId::try_from("e38 : 00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
360        assert_eq!(EdgeId::try_from("ef8:00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
361        assert_eq!(EdgeId::try_from("e:00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
362        assert_eq!(EdgeId::try_from("e0:0000000000000000000000000000000"), Err(ParseIdError::FrameIdLength));
363        assert_eq!(EdgeId::try_from("e0:000000000000000000000000000000000"), Err(ParseIdError::FrameIdLength));
364    }
365
366    #[test]
367    fn test_node_id_parsing() {
368        assert_eq!(NodeId::try_from("n0"), Ok(NodeId(GraphItemId{ id: 0, frame_id: None })));
369        assert_eq!(NodeId::try_from("n8"), Ok(NodeId(GraphItemId{ id: 8, frame_id: None })));
370        assert_eq!(NodeId::try_from("n200"), Ok(NodeId(GraphItemId{ id: 200, frame_id: None })));
371        assert_eq!(NodeId::try_from("n103810150"), Ok(NodeId(GraphItemId{ id: 103810150, frame_id: None })));
372        assert_eq!(NodeId::try_from("n0:00000000000000000000000000000000"), Ok(NodeId(GraphItemId{ id: 0, frame_id: Some(FrameId(0)) })));
373        assert_eq!(NodeId::try_from("n8:00000000000000000000000000000001"), Ok(NodeId(GraphItemId{ id: 8, frame_id: Some(FrameId(1)) })));
374        assert_eq!(NodeId::try_from("n200:0000000000000000000000000000000f"), Ok(NodeId(GraphItemId{ id: 200, frame_id: Some(FrameId(15)) })));
375        assert_eq!(NodeId::try_from("n103810150:FfFFFFFfFffFFFfFFFFfffFFFfFFFfff"), Ok(NodeId(GraphItemId{ id: 103810150, frame_id: Some(FrameId(u128::MAX)) })));
376
377        assert_eq!(NodeId::try_from("e0"), Err(ParseIdError::MissingPrefix));
378        assert_eq!(NodeId::try_from("8"), Err(ParseIdError::MissingPrefix));
379        assert_eq!(NodeId::try_from("n 200"), Err(ParseIdError::ParseIntError));
380        assert_eq!(NodeId::try_from("n103810150:"), Err(ParseIdError::FrameIdLength));
381        assert_eq!(NodeId::try_from("e0:00000000000000000000000000000000"), Err(ParseIdError::MissingPrefix));
382        assert_eq!(NodeId::try_from("0:00000000000000000000000000000000"), Err(ParseIdError::MissingPrefix));
383        assert_eq!(NodeId::try_from("0n:00000000000000000000000000000000"), Err(ParseIdError::MissingPrefix));
384        assert_eq!(NodeId::try_from("n:00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
385        assert_eq!(NodeId::try_from(":00000000000000000000000000000000"), Err(ParseIdError::MissingPrefix));
386        assert_eq!(NodeId::try_from("n38 : 00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
387        assert_eq!(NodeId::try_from("nf8:00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
388        assert_eq!(NodeId::try_from("n:00000000000000000000000000000000"), Err(ParseIdError::ParseIntError));
389        assert_eq!(NodeId::try_from("n0:0000000000000000000000000000000"), Err(ParseIdError::FrameIdLength));
390        assert_eq!(NodeId::try_from("n0:000000000000000000000000000000000"), Err(ParseIdError::FrameIdLength));
391    }
392
393    #[test]
394    fn test_round_trip() {
395        fn test_str(id_str: &str) {
396            assert_eq!(format!("{}", NodeId::try_from(id_str).unwrap()), id_str);
397
398            let node_id = NodeId::try_from(id_str).unwrap();
399
400            assert_eq!(NodeId::try_from(format!("{}", node_id).as_str()).unwrap(), node_id);
401        }
402
403        test_str("n0");
404        test_str("n8");
405        test_str("n200");
406        test_str("n103810150");
407        test_str("n0:00000000000000000000000000000000");
408        test_str("n8:00000000000000000000000000000001");
409        test_str("n200:0000000000000000000000000000000F");
410        test_str("n103810150:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF");
411        test_str("n99999:0123456789ABCDEF0123456789ABCDEF");
412    }
413}