pagegraph/
types.rs

1use crate::graph::FrameId;
2
3/// HtmlElementId represents the unsigned integer identifier that Blink uses
4/// internally for each HTML element created during the execution of a Web page.
5/// Note that values are unique (and monotonically increasing) for all
6/// documents executing in the
7/// [same process](https://developer.chrome.com/blog/site-isolation/).
8/// This set of values are shared across HTML elements (e.g., `<a>`, `<img>`),
9/// text nodes, and white space nodes.
10pub type HtmlElementId = usize;
11
12/// ScriptId represents the unsigned integer identifier V8 uses
13/// for tracking each JavaScript code unit compiled and executed during
14/// the execution of a page. Note that values are monotonically increasing for
15/// all scripts executing, including scripts defined inline in `<script>`
16/// tags, "classic" and "module" scripts fetched via the `<script>` element's
17/// `src` property, or scripts otherwise passed to the JavaScript compiler
18/// (e.g., `eval`, strings provided as the first argument to `setInterval`,
19/// attributes like "onclick" defined in HTML elements, etc.).
20pub type ScriptId = usize;
21
22/// A string encoding a URL. May either be a full URL (protocol, host, port.
23/// path, etc.) or a relative one, depending on the context in the graph.
24pub type Url = String;
25
26/// A string encoding the name of an HTML tag (e.g., `"a"` for an anchor tag,
27/// or `"img"` for an image tag).
28pub type HtmlTag = String;
29
30/// A string encoding the name of an attribute on an HTML tag (e.g., `"href"`
31/// for the target of an anchor tag, or `"src"` for the source URL of the image
32/// presented in an image tag).
33pub type HtmlAttr = String;
34
35/// Represents the type of any PageGraph node, along with any associated type-specific data.
36/// Nodes in PageGraph (mostly) represent either Actors (things that do things)
37/// or Actees (things that have things done to them).
38///
39/// For example, if JavaScript code creates a HTML element and injects it into
40/// a document, that would be represented in PageGraph with three nodes:
41///
42/// 1. a node representing the JavaScript code unit
43/// 2. a node representing the HTML element that was created, and
44/// 3. a third node representing the existing HTML element the just created
45///    HTML element is inserted below in the DOM.
46#[derive(Clone, PartialEq, Debug, serde::Serialize)]
47pub enum NodeType {
48    /// Resource nodes record URLs that are requested from network. Each
49    /// URL requested is represented with its own Resource node. Each
50    /// request is denoted with a [`RequestStart`](EdgeType::RequestStart) edge, and each
51    /// response is denoted with either a [`RequestComplete`](EdgeType::RequestComplete) or
52    /// [`RequestError`](EdgeType::RequestError) edge (which record whether the request to the
53    /// URL succeeded, and if so, what was returned).
54    Resource {
55        /// The URL represented by this node.
56        url: String
57    },
58    /// WebApi nodes represent [Web APIs](https://developer.mozilla.org/en-US/docs/Web/API)
59    /// provided by the browser that JavaScript code can call. There will be at
60    /// most one WebApi node for each Web API called during the execution
61    /// of the page.
62    ///
63    /// Each call to the method represented by this node is encoded
64    /// with the following:
65    /// 1. a [`Script`](NodeType::Script) node, recording the JavaScript code unit
66    ///    calling the Web API.
67    /// 2. an outgoing [`JsCall`](EdgeType::JsCall) edge, recording the arguments
68    ///    provided when calling the API), if any.
69    /// 3. a [`WebApi`](NodeType::WebApi) node , recording the Web API method being
70    ///    called
71    /// 4. a returning (incoming) [`JsResult`](EdgeType::JsResult) edge, recording the
72    ///    value returned by the WebApi, if any
73    ///
74    /// Note that for performance reasons, only Web APIs specified during a
75    /// Brave build are recorded in PageGraph. The list of APIs Brave
76    /// records by default in Nightly and Beta builds can be found
77    /// [in the Brave source code](https://github.com/brave/brave-core/blob/master/chromium_src/third_party/blink/renderer/bindings/scripts/bind_gen/interface.py#L20).
78    /// Note further that no WebAPIs are recorded in PageGraph in Stable Brave
79    /// builds. If you want to record other WebApis in PageGraph, you
80    /// should modify [the PageGraph interface.py script](https://github.com/brave/brave-core/blob/master/chromium_src/third_party/blink/renderer/bindings/scripts/bind_gen/interface.py#L20)
81    /// and then rebuild Brave.
82    WebApi {
83        /// The path to the Web API function this node represents.
84        /// For example, a value of
85        /// [`Performance.now`](https://developer.mozilla.org/en-US/docs/Web/API/Performance/now)
86        /// denotes the JavaScript method provided in most browsers at
87        /// `window.performance.now()`.
88        method: String
89    },
90    JsBuiltin {
91        method: String
92    },
93    /// HTMLElement nodes represent the elements that make up the structure
94    /// of a Web page. They map to things like `<a>`, `<img>`, `<div>`, etc.
95    /// PageGraph creates one for each HTML element that exists at any point
96    /// during the lifetime of the web page (even those that are not
97    /// inserted into the DOM).
98    HtmlElement {
99        /// Stores the tag name of the HTML element, similar to the
100        /// [`tagName`](https://developer.mozilla.org/en-US/docs/Web/API/Element/tagName)
101        /// JavaScript attribute. For an image tag, this value will be "img",
102        /// for a unordered list element this value will be "ul", etc.
103        tag_name: HtmlTag,
104        /// Records whether the node is alive (and not garbage collected)
105        /// at the point in time when the PageGraph document was serialized.
106        is_deleted: bool,
107        /// The integer identifier Blink assigns to each element in the DOM
108        /// of a web page. Will be unique for all
109        /// [`HtmlElements`](NodeType::HtmlElement),
110        /// [TextNodes](NodeType.TextNode), and [DomRoots](NodeType.DomRoot)
111        /// within the same process (in terms of process isolation).
112        ///
113        /// This value referenced from the attributes of several actions (i.e.,
114        /// [events][EdgeType]) recorded in PageGraph. For example, when a
115        /// a [JavaScript unit](NodeType::Script) [inserts](EdgeType::InsertNode)
116        /// a [`HtmlElement`](NodeType::HtmlElement) into a document, the
117        /// [`InsertNode`](EdgeType::InsertNode) edge's
118        /// [`parent`](EdgeType::InsertNode::parent) attribute records the
119        /// [`node_id`](NodeType::HtmlElement::node_id) of the HtmlElement the
120        /// node was inserted below.
121        node_id: HtmlElementId,
122    },
123    /// TextNode nodes represent [text nodes](https://developer.mozilla.org/en-US/docs/Web/API/Text)
124    /// in the DOM tree. PageGraph uses this node type to also represent
125    /// white space, so (unfortunately) all text nodes don't necessarily
126    /// include text; some are just white space.
127    TextNode {
128        /// The text string, if any, in the DOM tree, represented by this node.
129        text: Option<String>,
130        /// Records whether the node is alive (and not garbage collected)
131        /// at the point in time when the PageGraph document was serialized.
132        is_deleted: bool,
133        /// The integer identifier Blink assigns to each element in the DOM
134        /// of a web page. Will be unique for all
135        /// [`HtmlElements`](NodeType::HtmlElement),
136        /// [TextNodes](NodeType.TextNode), and [DomRoots](NodeType.DomRoot)
137        /// within the same process (in terms of process isolation).
138        ///
139        /// This value referenced from the attributes of several actions (i.e.,
140        /// [events][EdgeType]) recorded in PageGraph. For example, when a
141        /// a [JavaScript unit](NodeType::Script) [inserts](EdgeType::InsertNode)
142        /// a [text](NodeType::TextNode) into a document, the
143        /// [`InsertNode`](EdgeType::InsertNode) edge's
144        /// [`parent`](EdgeType::InsertNode::parent) attribute records the
145        /// [`node_id`](NodeType::TextNode::node_id) of the TextNode the
146        /// node was inserted below.
147        node_id: HtmlElementId,
148    },
149    DomRoot {
150        url: Option<Url>,
151        tag_name: HtmlTag,
152        is_deleted: bool,
153        node_id: HtmlElementId,
154    },
155    FrameOwner {
156        tag_name: HtmlTag,
157        is_deleted: bool,
158        node_id: HtmlElementId,
159    },
160    /// Singleton node that represents the [`window.localStorage`](https://developer.mozilla.org/en-US/docs/Web/API/Window/localStorage)
161    /// storage area read from, and written to, during the execution of the Web
162    /// site.
163    ///
164    /// Note that this node does not record what values are in the storage
165    /// area at any given point. To observe what keys are written to and read
166    /// from local storage, consider instead the incoming and outgoing edges to
167    /// this node.
168    ///
169    /// The following edges record [JavaScript code](NodeType::Script)
170    /// interacting with the storage area:
171    /// - [`StorageSet`](EdgeType::StorageSet): records when a script writes
172    ///   a value to the storage area. This maps to calling [`setItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/setItem)
173    ///   (or equivalent) on the storage area.
174    /// - [`ReadStorageCall`](EdgeType::ReadStorageCall) records when a script
175    ///   attempts to read a value from the storage area. This corresponds to
176    ///   a script calling [`getItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/getItem)
177    ///   (or equivalent) on the storage area.
178    /// - [`DeleteStorage`](EdgeType::DeleteStorage) records when a script
179    ///   deletes a key from the storage area. This corresponds to a
180    ///   script calling [`removeItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/removeItem)
181    ///   on the storage area.
182    /// - [`ClearStorage`](EdgeType::ClearStorage) records when a script
183    ///   clears all values from the storage area. This corresponds to a script
184    ///   calling [`clear`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/clear)
185    ///   on the storage area.
186    ///
187    /// The following edges record the results of [JavaScript code](NodeType::Script)
188    /// receive when interacting with the storage area:
189    /// - [`StorageReadResult`](EdgeType::StorageReadResult) records the
190    ///   value returned to the script that read a value from the storage
191    ///   area.
192    LocalStorage {},
193    /// Singleton node that represents the [`window.sessionStorage`](https://developer.mozilla.org/en-US/docs/Web/API/Window/sessionStorage)
194    /// storage area read from, and written to, during the execution of the Web
195    /// site.
196    ///
197    /// Note that this node does not record what values are in the storage
198    /// area at any given point. To observe what keys are written to and read
199    /// from session storage, consider instead the incoming and outgoing edges to
200    /// this node.
201    ///
202    /// The following edges record [JavaScript code](NodeType::Script)
203    /// interacting with the storage area:
204    /// - [`StorageSet`](EdgeType::StorageSet): records when a script writes
205    ///   a value to the storage area. This maps to calling [`setItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/setItem)
206    ///   (or equivalent) on the storage area.
207    /// - [`ReadStorageCall`](EdgeType::ReadStorageCall) records when a script
208    ///   attempts to read a value from the storage area. This corresponds to
209    ///   a script calling [`getItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/getItem)
210    ///   (or equivalent) on the storage area.
211    /// - [`DeleteStorage`](EdgeType::DeleteStorage) records when a script
212    ///   deletes a key from the storage area. This corresponds to a
213    ///   script calling [`removeItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/removeItem)
214    ///   on the storage area.
215    /// - [`ClearStorage`](EdgeType::ClearStorage) records when a script
216    ///   clears all values from the storage area. This corresponds to a script
217    ///   calling [`clear`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/clear)
218    ///   on the storage area.
219    ///
220    /// The following edges record the results of [JavaScript code](NodeType::Script)
221    /// receive when interacting with the storage area:
222    /// - [`StorageReadResult`](EdgeType::StorageReadResult) records the
223    ///   value returned to the script that read a value from the storage
224    ///   area.
225    SessionStorage {},
226    /// Singleton node that represents the [`document.cookie`]()
227    /// property read from, and written to, during the execution of the Web
228    /// site. Note that this property *does not* encode cookies set or
229    /// read through HTTP headers, only cookie activities from scripts.
230    ///
231    /// The following edges record [JavaScript code](NodeType::Script)
232    /// reading and writing cookies during page execution:
233    /// - [`StorageSet`](EdgeType::StorageSet): records when a script writes
234    ///   assigns a value to the `document.cookie` property. Note though that
235    ///   because of the how the JavaScript cookie API works, assigning a
236    ///   value to the `document.cookie` property might actually delete
237    ///   or modify a cookie in the site's cookie jar.
238    /// - [`ReadStorageCall`](EdgeType::ReadStorageCall) records when a script
239    ///   is reading the value of `document.cookie`.
240    ///
241    /// The following edges record value returned to [JavaScript code](NodeType::Script)
242    /// reading the state of the `document.cookie` property.
243    /// - [`StorageReadResult`](EdgeType::StorageReadResult) records the
244    ///   value returned to the script that read the value of `document.cookie`.
245    CookieJar {},
246    /// Script nodes represent JavaScript code units compiled by V8
247    /// and executed during the lifetime of the page. Script nodes
248    /// encode any kind of script that can run during the page's execution
249    /// (e.g., fetched or inlined "classic" scripts or module scripts,
250    /// or eval'ed scripts).
251    Script {
252        /// The URL this script was fetched from, in the case that the script
253        /// was fetched from a URL via a `<script>` element's `src` attribute,
254        /// or a dynamically fetched module script.
255        url: Option<Url>,
256        /// The type of script being executed, either a "module" script
257        /// or a "classic" script.
258        script_type: String,
259        /// The V8 identifier for this JavaScript code unit.
260        script_id: ScriptId,
261        /// The text of the script as passed to the v8 compiler.
262        source: String,
263    },
264    /// Singleton node representing Blink parser, responsible for parsing
265    /// HTML text and generating page elements.
266    Parser {},
267    Binding {
268        binding: String,
269        binding_type: String,
270    },
271    BindingEvent {
272        binding_event: String,
273    },
274    RemoteFrame {
275        frame_id: FrameId,
276    },
277    AdFilter {
278        rule: String
279    },
280    TrackerFilter,  // TODO
281    FingerprintingFilter,   // TODO
282    Storage {},
283    BraveShields {},
284    AdsShield {},
285    TrackersShield {},
286    JavascriptShield {},
287    FingerprintingShield {},
288    FingerprintingV2Shield {},
289    Extensions {},
290}
291
292#[derive(Clone, PartialEq, Debug)]
293#[derive(serde::Serialize)]
294pub enum RequestType {
295    Image,
296    Script,
297    CSS,
298    AJAX,
299    Unknown,
300}
301
302impl From<&str> for RequestType {
303    fn from(v: &str) -> Self {
304        match v {
305            "Image" => Self::Image,
306            "Script" => Self::Script,
307            "CSS" => Self::CSS,
308            "AJAX" => Self::AJAX,
309            "Unknown" => Self::Unknown,
310            _ => Self::Unknown,
311        }
312    }
313}
314
315impl RequestType {
316    pub fn as_str(&self) -> &'static str {
317        match self {
318            Self::Image => "image",
319            Self::Script => "script",
320            Self::CSS => "stylesheet",
321            Self::AJAX => "xhr",
322            Self::Unknown => "unknown",
323        }
324    }
325}
326
327/// Represents the type of any PageGraph edge, along with any associated type-specific data.
328/// Edges in PageGraph represent actions taken by some actor in the
329/// page (e.g., a JavaScript code unit), being performed on some other element
330/// in the page (e.g., a resource being fetched). Edges are outgoing from
331/// the actor, and incoming to the actee.
332#[derive(Clone, PartialEq, Debug)]
333#[derive(serde::Serialize)]
334pub enum EdgeType {
335    CrossDom {},
336    TextChange {},
337    /// `RemoveNode` edges encode a HTML element being removed from the DOM
338    /// tree.
339    ///
340    /// The actor node will be the [`Script`](NodeType::Script) node
341    /// that is removing a HTML element from the document.
342    ///
343    /// The actee node will be the `HtmlElement`](NodeType::HtmlElement) node
344    /// being removed from the document.
345    RemoveNode {},
346    /// `DeleteNode` edges encode a HTML element being deleted by JavaScript
347    /// code. Note that this is a distinct action from merely removing an
348    /// HTML element from a document (which is encoded with a
349    /// [`RemoveNode`](EdgeType::RemoveNode) edge).
350    ///
351    /// The actor node will be the [`Script`](NodeType::Script) node
352    /// that is delete a HTML element.
353    ///
354    /// The actee node will be the `HtmlElement`](NodeType::HtmlElement) node
355    /// being deleted.
356    DeleteNode {},
357    /// `InsertNode` edges encode a HTML element being inserted into a DOM
358    /// tree.
359    ///
360    /// The actor node will either be the [`Parser`](NodeType::Parser)
361    /// (indicating that the element was inserted into the document because
362    /// of text being parsed, most often from the initial HTML document)
363    /// or a [`Script`](NodeType::Script) node (indicating that the element
364    /// was inserted into the document dynamically).
365    ///
366    /// The actee node will be a [`HtmlElement`](NodeType::HtmlElement) node
367    /// depicting the HTML element being inserted into the document.
368    InsertNode {
369        /// The identifier of the DOM element the actee
370        /// [`HtmlElement`](NodeType::HtmlElement) node is being inserted
371        /// beneath in the document.
372        parent: HtmlElementId,
373        /// The identifier of the prior sibling DOM element the actee
374        /// [`HtmlElement`](NodeType::HtmlElement) node is being inserted
375        /// before in the document. If this value is not present, it indicates
376        /// that the actee node was the first child of the parent node at
377        /// insertion time,
378        before: Option<HtmlElementId>,
379    },
380    /// `CreateNode` edges encode that an HTML element that was created during
381    /// the execution of the page.
382    ///
383    /// The actor node will either be the [`Parser`](NodeType::Parser)
384    /// (indicating that the element was created because it was defined in
385    /// text parsed by the blink parser) or a [`Script`](NodeType::Script) node
386    /// (indicating that the element was dynamically created by a JavaScript
387    /// code unit (e.g., `document.createElement`).
388    ///
389    /// The actee node will be a [`HtmlElement`](NodeType::HtmlElement) node
390    /// depicting the HTML element that was created.
391    CreateNode {},
392    /// `JsResult` edges encode a value being returned from a property read
393    /// or a function call in JavaScript code.
394    ///
395    /// The actor node will be either a [`WebApi`](NodeType::WebApi) node
396    /// (representing the WebAPI method or property that was called) or
397    /// a [`JsBuiltin`](NodeType::JsBuiltin) node (representing
398    /// an instrumented method or function thats defined as part of
399    /// ECMAScript).
400    ///
401    /// The actee node will be a [`Script`](NodeType::Script) node
402    /// representing the JavaScript code unit that the value is being
403    /// returned to.
404    JsResult {
405        /// The value being returned from an API to a JavaScript code unit.
406        /// Note that this will not be present for APIs that do not return
407        /// a value when called, such as `addEventListener`).
408        value: Option<String>
409    },
410    /// `JsCall` edges encode a JavaScript function/method being called
411    /// by JavaScript code.
412    ///
413    /// The actor node will be [`Script`](NodeType::Script) node, representing
414    /// the JavaScript code unit calling the property, function, or method.
415    ///
416    /// The actee node will be either a [`WebApi`](NodeType::WebApi) node
417    /// (representing the WebAPI method or property being called) or
418    /// a [`JsBuiltin`](NodeType::JsBuiltin) node (representing
419    /// an instrumented method or function thats defined as part of
420    /// ECMAScript being called).
421    JsCall {
422        /// An serialized version of any arguments provided when the method or
423        /// function being called (if any).
424        args: Option<String>,
425        /// The character offset in the JavaScript text where this JavaScript
426        /// call occurred.
427        script_position: usize,
428    },
429    RequestComplete {
430        resource_type: String,
431        status: String,
432        value: Option<String>,
433        response_hash: Option<String>,
434        request_id: usize,
435        headers: String,
436        size: String,
437    },
438    RequestError {
439        status: String,
440        request_id: usize,
441        value: Option<String>,
442        headers: String,
443        size: String,
444    },
445    RequestStart {
446        request_type: RequestType,
447        status: String,
448        request_id: usize,
449    },
450    RequestResponse, // TODO
451    AddEventListener {
452        key: String,
453        event_listener_id: usize,
454        script_id: ScriptId,
455    },
456    RemoveEventListener {
457        key: String,
458        event_listener_id: usize,
459        script_id: ScriptId,
460    },
461    EventListener {
462        key: String,
463        event_listener_id: usize,
464    },
465    StorageSet {
466        key: String,
467        value: Option<String>,
468    },
469    StorageReadResult {
470        key: String,
471        value: Option<String>,
472    },
473    DeleteStorage {
474        key: String
475    },
476    ReadStorageCall {
477        key: String
478    },
479    ClearStorage {
480        key: String
481    },
482    ExecuteFromAttribute {
483        attr_name: HtmlAttr
484    },
485    Execute {},
486    /// `SetAttribute` edges encode JavaScript code setting an attribute
487    /// on a HTML element.
488    ///
489    /// The actor node will be [`Script`](NodeType::Script) node, representing
490    /// the JavaScript code setting the attribute.
491    ///
492    /// The actee node will be a [`HtmlElement`](NodeType::HtmlElement) node,
493    /// representing the HTML element that is having an attribute set on it.
494    SetAttribute {
495        /// The name of the HTML attribute being set (e.g., `height`,
496        /// `href`, `src`).
497        key: HtmlAttr,
498        /// The value being assigned to the HTML attribute (if any).
499        value: Option<String>,
500        /// If the attribute being set is part of the the element's
501        /// CSS style definition.
502        is_style: bool,
503    },
504    /// `DeleteAttribute` edges encode JavaScript code deleting an attribute
505    /// from a HTML element.
506    ///
507    /// The actor node will be [`Script`](NodeType::Script) node, representing
508    /// the JavaScript code deleting the attribute.
509    ///
510    /// The actee node will be a [`HtmlElement`](NodeType::HtmlElement) node,
511    /// representing the HTML element that is having the attribute deleted.
512    DeleteAttribute {
513        /// The name of the HTML attribute being deleted (e.g., `height`,
514        /// `href`, `src`).
515        key: HtmlAttr,
516        /// If the attribute being deleted is part of the the element's
517        /// CSS style definition.
518        is_style: bool,
519    },
520    Binding {},
521    BindingEvent {
522        script_position: usize,
523    },
524    Filter {},
525    Structure {},
526    Shield {},
527    ResourceBlock {},
528    StorageBucket {},
529}