pagegraph/types.rs
1use crate::graph::FrameId;
2
3/// HtmlElementId represents the unsigned integer identifier that Blink uses
4/// internally for each HTML element created during the execution of a Web page.
5/// Note that values are unique (and monotonically increasing) for all
6/// documents executing in the
7/// [same process](https://developer.chrome.com/blog/site-isolation/).
8/// This set of values are shared across HTML elements (e.g., `<a>`, `<img>`),
9/// text nodes, and white space nodes.
10pub type HtmlElementId = usize;
11
12/// ScriptId represents the unsigned integer identifier V8 uses
13/// for tracking each JavaScript code unit compiled and executed during
14/// the execution of a page. Note that values are monotonically increasing for
15/// all scripts executing, including scripts defined inline in `<script>`
16/// tags, "classic" and "module" scripts fetched via the `<script>` element's
17/// `src` property, or scripts otherwise passed to the JavaScript compiler
18/// (e.g., `eval`, strings provided as the first argument to `setInterval`,
19/// attributes like "onclick" defined in HTML elements, etc.).
20pub type ScriptId = usize;
21
22/// A string encoding a URL. May either be a full URL (protocol, host, port.
23/// path, etc.) or a relative one, depending on the context in the graph.
24pub type Url = String;
25
26/// A string encoding the name of an HTML tag (e.g., `"a"` for an anchor tag,
27/// or `"img"` for an image tag).
28pub type HtmlTag = String;
29
30/// A string encoding the name of an attribute on an HTML tag (e.g., `"href"`
31/// for the target of an anchor tag, or `"src"` for the source URL of the image
32/// presented in an image tag).
33pub type HtmlAttr = String;
34
35/// Represents the type of any PageGraph node, along with any associated type-specific data.
36/// Nodes in PageGraph (mostly) represent either Actors (things that do things)
37/// or Actees (things that have things done to them).
38///
39/// For example, if JavaScript code creates a HTML element and injects it into
40/// a document, that would be represented in PageGraph with three nodes:
41///
42/// 1. a node representing the JavaScript code unit
43/// 2. a node representing the HTML element that was created, and
44/// 3. a third node representing the existing HTML element the just created
45/// HTML element is inserted below in the DOM.
46#[derive(Clone, PartialEq, Debug, serde::Serialize)]
47pub enum NodeType {
48 /// Resource nodes record URLs that are requested from network. Each
49 /// URL requested is represented with its own Resource node. Each
50 /// request is denoted with a [`RequestStart`](EdgeType::RequestStart) edge, and each
51 /// response is denoted with either a [`RequestComplete`](EdgeType::RequestComplete) or
52 /// [`RequestError`](EdgeType::RequestError) edge (which record whether the request to the
53 /// URL succeeded, and if so, what was returned).
54 Resource {
55 /// The URL represented by this node.
56 url: String
57 },
58 /// WebApi nodes represent [Web APIs](https://developer.mozilla.org/en-US/docs/Web/API)
59 /// provided by the browser that JavaScript code can call. There will be at
60 /// most one WebApi node for each Web API called during the execution
61 /// of the page.
62 ///
63 /// Each call to the method represented by this node is encoded
64 /// with the following:
65 /// 1. a [`Script`](NodeType::Script) node, recording the JavaScript code unit
66 /// calling the Web API.
67 /// 2. an outgoing [`JsCall`](EdgeType::JsCall) edge, recording the arguments
68 /// provided when calling the API), if any.
69 /// 3. a [`WebApi`](NodeType::WebApi) node , recording the Web API method being
70 /// called
71 /// 4. a returning (incoming) [`JsResult`](EdgeType::JsResult) edge, recording the
72 /// value returned by the WebApi, if any
73 ///
74 /// Note that for performance reasons, only Web APIs specified during a
75 /// Brave build are recorded in PageGraph. The list of APIs Brave
76 /// records by default in Nightly and Beta builds can be found
77 /// [in the Brave source code](https://github.com/brave/brave-core/blob/master/chromium_src/third_party/blink/renderer/bindings/scripts/bind_gen/interface.py#L20).
78 /// Note further that no WebAPIs are recorded in PageGraph in Stable Brave
79 /// builds. If you want to record other WebApis in PageGraph, you
80 /// should modify [the PageGraph interface.py script](https://github.com/brave/brave-core/blob/master/chromium_src/third_party/blink/renderer/bindings/scripts/bind_gen/interface.py#L20)
81 /// and then rebuild Brave.
82 WebApi {
83 /// The path to the Web API function this node represents.
84 /// For example, a value of
85 /// [`Performance.now`](https://developer.mozilla.org/en-US/docs/Web/API/Performance/now)
86 /// denotes the JavaScript method provided in most browsers at
87 /// `window.performance.now()`.
88 method: String
89 },
90 JsBuiltin {
91 method: String
92 },
93 /// HTMLElement nodes represent the elements that make up the structure
94 /// of a Web page. They map to things like `<a>`, `<img>`, `<div>`, etc.
95 /// PageGraph creates one for each HTML element that exists at any point
96 /// during the lifetime of the web page (even those that are not
97 /// inserted into the DOM).
98 HtmlElement {
99 /// Stores the tag name of the HTML element, similar to the
100 /// [`tagName`](https://developer.mozilla.org/en-US/docs/Web/API/Element/tagName)
101 /// JavaScript attribute. For an image tag, this value will be "img",
102 /// for a unordered list element this value will be "ul", etc.
103 tag_name: HtmlTag,
104 /// Records whether the node is alive (and not garbage collected)
105 /// at the point in time when the PageGraph document was serialized.
106 is_deleted: bool,
107 /// The integer identifier Blink assigns to each element in the DOM
108 /// of a web page. Will be unique for all
109 /// [`HtmlElements`](NodeType::HtmlElement),
110 /// [TextNodes](NodeType.TextNode), and [DomRoots](NodeType.DomRoot)
111 /// within the same process (in terms of process isolation).
112 ///
113 /// This value referenced from the attributes of several actions (i.e.,
114 /// [events][EdgeType]) recorded in PageGraph. For example, when a
115 /// a [JavaScript unit](NodeType::Script) [inserts](EdgeType::InsertNode)
116 /// a [`HtmlElement`](NodeType::HtmlElement) into a document, the
117 /// [`InsertNode`](EdgeType::InsertNode) edge's
118 /// [`parent`](EdgeType::InsertNode::parent) attribute records the
119 /// [`node_id`](NodeType::HtmlElement::node_id) of the HtmlElement the
120 /// node was inserted below.
121 node_id: HtmlElementId,
122 },
123 /// TextNode nodes represent [text nodes](https://developer.mozilla.org/en-US/docs/Web/API/Text)
124 /// in the DOM tree. PageGraph uses this node type to also represent
125 /// white space, so (unfortunately) all text nodes don't necessarily
126 /// include text; some are just white space.
127 TextNode {
128 /// The text string, if any, in the DOM tree, represented by this node.
129 text: Option<String>,
130 /// Records whether the node is alive (and not garbage collected)
131 /// at the point in time when the PageGraph document was serialized.
132 is_deleted: bool,
133 /// The integer identifier Blink assigns to each element in the DOM
134 /// of a web page. Will be unique for all
135 /// [`HtmlElements`](NodeType::HtmlElement),
136 /// [TextNodes](NodeType.TextNode), and [DomRoots](NodeType.DomRoot)
137 /// within the same process (in terms of process isolation).
138 ///
139 /// This value referenced from the attributes of several actions (i.e.,
140 /// [events][EdgeType]) recorded in PageGraph. For example, when a
141 /// a [JavaScript unit](NodeType::Script) [inserts](EdgeType::InsertNode)
142 /// a [text](NodeType::TextNode) into a document, the
143 /// [`InsertNode`](EdgeType::InsertNode) edge's
144 /// [`parent`](EdgeType::InsertNode::parent) attribute records the
145 /// [`node_id`](NodeType::TextNode::node_id) of the TextNode the
146 /// node was inserted below.
147 node_id: HtmlElementId,
148 },
149 DomRoot {
150 url: Option<Url>,
151 tag_name: HtmlTag,
152 is_deleted: bool,
153 node_id: HtmlElementId,
154 },
155 FrameOwner {
156 tag_name: HtmlTag,
157 is_deleted: bool,
158 node_id: HtmlElementId,
159 },
160 /// Singleton node that represents the [`window.localStorage`](https://developer.mozilla.org/en-US/docs/Web/API/Window/localStorage)
161 /// storage area read from, and written to, during the execution of the Web
162 /// site.
163 ///
164 /// Note that this node does not record what values are in the storage
165 /// area at any given point. To observe what keys are written to and read
166 /// from local storage, consider instead the incoming and outgoing edges to
167 /// this node.
168 ///
169 /// The following edges record [JavaScript code](NodeType::Script)
170 /// interacting with the storage area:
171 /// - [`StorageSet`](EdgeType::StorageSet): records when a script writes
172 /// a value to the storage area. This maps to calling [`setItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/setItem)
173 /// (or equivalent) on the storage area.
174 /// - [`ReadStorageCall`](EdgeType::ReadStorageCall) records when a script
175 /// attempts to read a value from the storage area. This corresponds to
176 /// a script calling [`getItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/getItem)
177 /// (or equivalent) on the storage area.
178 /// - [`DeleteStorage`](EdgeType::DeleteStorage) records when a script
179 /// deletes a key from the storage area. This corresponds to a
180 /// script calling [`removeItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/removeItem)
181 /// on the storage area.
182 /// - [`ClearStorage`](EdgeType::ClearStorage) records when a script
183 /// clears all values from the storage area. This corresponds to a script
184 /// calling [`clear`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/clear)
185 /// on the storage area.
186 ///
187 /// The following edges record the results of [JavaScript code](NodeType::Script)
188 /// receive when interacting with the storage area:
189 /// - [`StorageReadResult`](EdgeType::StorageReadResult) records the
190 /// value returned to the script that read a value from the storage
191 /// area.
192 LocalStorage {},
193 /// Singleton node that represents the [`window.sessionStorage`](https://developer.mozilla.org/en-US/docs/Web/API/Window/sessionStorage)
194 /// storage area read from, and written to, during the execution of the Web
195 /// site.
196 ///
197 /// Note that this node does not record what values are in the storage
198 /// area at any given point. To observe what keys are written to and read
199 /// from session storage, consider instead the incoming and outgoing edges to
200 /// this node.
201 ///
202 /// The following edges record [JavaScript code](NodeType::Script)
203 /// interacting with the storage area:
204 /// - [`StorageSet`](EdgeType::StorageSet): records when a script writes
205 /// a value to the storage area. This maps to calling [`setItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/setItem)
206 /// (or equivalent) on the storage area.
207 /// - [`ReadStorageCall`](EdgeType::ReadStorageCall) records when a script
208 /// attempts to read a value from the storage area. This corresponds to
209 /// a script calling [`getItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/getItem)
210 /// (or equivalent) on the storage area.
211 /// - [`DeleteStorage`](EdgeType::DeleteStorage) records when a script
212 /// deletes a key from the storage area. This corresponds to a
213 /// script calling [`removeItem`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/removeItem)
214 /// on the storage area.
215 /// - [`ClearStorage`](EdgeType::ClearStorage) records when a script
216 /// clears all values from the storage area. This corresponds to a script
217 /// calling [`clear`](https://developer.mozilla.org/en-US/docs/Web/API/Storage/clear)
218 /// on the storage area.
219 ///
220 /// The following edges record the results of [JavaScript code](NodeType::Script)
221 /// receive when interacting with the storage area:
222 /// - [`StorageReadResult`](EdgeType::StorageReadResult) records the
223 /// value returned to the script that read a value from the storage
224 /// area.
225 SessionStorage {},
226 /// Singleton node that represents the [`document.cookie`]()
227 /// property read from, and written to, during the execution of the Web
228 /// site. Note that this property *does not* encode cookies set or
229 /// read through HTTP headers, only cookie activities from scripts.
230 ///
231 /// The following edges record [JavaScript code](NodeType::Script)
232 /// reading and writing cookies during page execution:
233 /// - [`StorageSet`](EdgeType::StorageSet): records when a script writes
234 /// assigns a value to the `document.cookie` property. Note though that
235 /// because of the how the JavaScript cookie API works, assigning a
236 /// value to the `document.cookie` property might actually delete
237 /// or modify a cookie in the site's cookie jar.
238 /// - [`ReadStorageCall`](EdgeType::ReadStorageCall) records when a script
239 /// is reading the value of `document.cookie`.
240 ///
241 /// The following edges record value returned to [JavaScript code](NodeType::Script)
242 /// reading the state of the `document.cookie` property.
243 /// - [`StorageReadResult`](EdgeType::StorageReadResult) records the
244 /// value returned to the script that read the value of `document.cookie`.
245 CookieJar {},
246 /// Script nodes represent JavaScript code units compiled by V8
247 /// and executed during the lifetime of the page. Script nodes
248 /// encode any kind of script that can run during the page's execution
249 /// (e.g., fetched or inlined "classic" scripts or module scripts,
250 /// or eval'ed scripts).
251 Script {
252 /// The URL this script was fetched from, in the case that the script
253 /// was fetched from a URL via a `<script>` element's `src` attribute,
254 /// or a dynamically fetched module script.
255 url: Option<Url>,
256 /// The type of script being executed, either a "module" script
257 /// or a "classic" script.
258 script_type: String,
259 /// The V8 identifier for this JavaScript code unit.
260 script_id: ScriptId,
261 /// The text of the script as passed to the v8 compiler.
262 source: String,
263 },
264 /// Singleton node representing Blink parser, responsible for parsing
265 /// HTML text and generating page elements.
266 Parser {},
267 Binding {
268 binding: String,
269 binding_type: String,
270 },
271 BindingEvent {
272 binding_event: String,
273 },
274 RemoteFrame {
275 frame_id: FrameId,
276 },
277 AdFilter {
278 rule: String
279 },
280 TrackerFilter, // TODO
281 FingerprintingFilter, // TODO
282 Storage {},
283 BraveShields {},
284 AdsShield {},
285 TrackersShield {},
286 JavascriptShield {},
287 FingerprintingShield {},
288 FingerprintingV2Shield {},
289 Extensions {},
290}
291
292#[derive(Clone, PartialEq, Debug)]
293#[derive(serde::Serialize)]
294pub enum RequestType {
295 Image,
296 Script,
297 CSS,
298 AJAX,
299 Unknown,
300}
301
302impl From<&str> for RequestType {
303 fn from(v: &str) -> Self {
304 match v {
305 "Image" => Self::Image,
306 "Script" => Self::Script,
307 "CSS" => Self::CSS,
308 "AJAX" => Self::AJAX,
309 "Unknown" => Self::Unknown,
310 _ => Self::Unknown,
311 }
312 }
313}
314
315impl RequestType {
316 pub fn as_str(&self) -> &'static str {
317 match self {
318 Self::Image => "image",
319 Self::Script => "script",
320 Self::CSS => "stylesheet",
321 Self::AJAX => "xhr",
322 Self::Unknown => "unknown",
323 }
324 }
325}
326
327/// Represents the type of any PageGraph edge, along with any associated type-specific data.
328/// Edges in PageGraph represent actions taken by some actor in the
329/// page (e.g., a JavaScript code unit), being performed on some other element
330/// in the page (e.g., a resource being fetched). Edges are outgoing from
331/// the actor, and incoming to the actee.
332#[derive(Clone, PartialEq, Debug)]
333#[derive(serde::Serialize)]
334pub enum EdgeType {
335 CrossDom {},
336 TextChange {},
337 /// `RemoveNode` edges encode a HTML element being removed from the DOM
338 /// tree.
339 ///
340 /// The actor node will be the [`Script`](NodeType::Script) node
341 /// that is removing a HTML element from the document.
342 ///
343 /// The actee node will be the `HtmlElement`](NodeType::HtmlElement) node
344 /// being removed from the document.
345 RemoveNode {},
346 /// `DeleteNode` edges encode a HTML element being deleted by JavaScript
347 /// code. Note that this is a distinct action from merely removing an
348 /// HTML element from a document (which is encoded with a
349 /// [`RemoveNode`](EdgeType::RemoveNode) edge).
350 ///
351 /// The actor node will be the [`Script`](NodeType::Script) node
352 /// that is delete a HTML element.
353 ///
354 /// The actee node will be the `HtmlElement`](NodeType::HtmlElement) node
355 /// being deleted.
356 DeleteNode {},
357 /// `InsertNode` edges encode a HTML element being inserted into a DOM
358 /// tree.
359 ///
360 /// The actor node will either be the [`Parser`](NodeType::Parser)
361 /// (indicating that the element was inserted into the document because
362 /// of text being parsed, most often from the initial HTML document)
363 /// or a [`Script`](NodeType::Script) node (indicating that the element
364 /// was inserted into the document dynamically).
365 ///
366 /// The actee node will be a [`HtmlElement`](NodeType::HtmlElement) node
367 /// depicting the HTML element being inserted into the document.
368 InsertNode {
369 /// The identifier of the DOM element the actee
370 /// [`HtmlElement`](NodeType::HtmlElement) node is being inserted
371 /// beneath in the document.
372 parent: HtmlElementId,
373 /// The identifier of the prior sibling DOM element the actee
374 /// [`HtmlElement`](NodeType::HtmlElement) node is being inserted
375 /// before in the document. If this value is not present, it indicates
376 /// that the actee node was the first child of the parent node at
377 /// insertion time,
378 before: Option<HtmlElementId>,
379 },
380 /// `CreateNode` edges encode that an HTML element that was created during
381 /// the execution of the page.
382 ///
383 /// The actor node will either be the [`Parser`](NodeType::Parser)
384 /// (indicating that the element was created because it was defined in
385 /// text parsed by the blink parser) or a [`Script`](NodeType::Script) node
386 /// (indicating that the element was dynamically created by a JavaScript
387 /// code unit (e.g., `document.createElement`).
388 ///
389 /// The actee node will be a [`HtmlElement`](NodeType::HtmlElement) node
390 /// depicting the HTML element that was created.
391 CreateNode {},
392 /// `JsResult` edges encode a value being returned from a property read
393 /// or a function call in JavaScript code.
394 ///
395 /// The actor node will be either a [`WebApi`](NodeType::WebApi) node
396 /// (representing the WebAPI method or property that was called) or
397 /// a [`JsBuiltin`](NodeType::JsBuiltin) node (representing
398 /// an instrumented method or function thats defined as part of
399 /// ECMAScript).
400 ///
401 /// The actee node will be a [`Script`](NodeType::Script) node
402 /// representing the JavaScript code unit that the value is being
403 /// returned to.
404 JsResult {
405 /// The value being returned from an API to a JavaScript code unit.
406 /// Note that this will not be present for APIs that do not return
407 /// a value when called, such as `addEventListener`).
408 value: Option<String>
409 },
410 /// `JsCall` edges encode a JavaScript function/method being called
411 /// by JavaScript code.
412 ///
413 /// The actor node will be [`Script`](NodeType::Script) node, representing
414 /// the JavaScript code unit calling the property, function, or method.
415 ///
416 /// The actee node will be either a [`WebApi`](NodeType::WebApi) node
417 /// (representing the WebAPI method or property being called) or
418 /// a [`JsBuiltin`](NodeType::JsBuiltin) node (representing
419 /// an instrumented method or function thats defined as part of
420 /// ECMAScript being called).
421 JsCall {
422 /// An serialized version of any arguments provided when the method or
423 /// function being called (if any).
424 args: Option<String>,
425 /// The character offset in the JavaScript text where this JavaScript
426 /// call occurred.
427 script_position: usize,
428 },
429 RequestComplete {
430 resource_type: String,
431 status: String,
432 value: Option<String>,
433 response_hash: Option<String>,
434 request_id: usize,
435 headers: String,
436 size: String,
437 },
438 RequestError {
439 status: String,
440 request_id: usize,
441 value: Option<String>,
442 headers: String,
443 size: String,
444 },
445 RequestStart {
446 request_type: RequestType,
447 status: String,
448 request_id: usize,
449 },
450 RequestResponse, // TODO
451 AddEventListener {
452 key: String,
453 event_listener_id: usize,
454 script_id: ScriptId,
455 },
456 RemoveEventListener {
457 key: String,
458 event_listener_id: usize,
459 script_id: ScriptId,
460 },
461 EventListener {
462 key: String,
463 event_listener_id: usize,
464 },
465 StorageSet {
466 key: String,
467 value: Option<String>,
468 },
469 StorageReadResult {
470 key: String,
471 value: Option<String>,
472 },
473 DeleteStorage {
474 key: String
475 },
476 ReadStorageCall {
477 key: String
478 },
479 ClearStorage {
480 key: String
481 },
482 ExecuteFromAttribute {
483 attr_name: HtmlAttr
484 },
485 Execute {},
486 /// `SetAttribute` edges encode JavaScript code setting an attribute
487 /// on a HTML element.
488 ///
489 /// The actor node will be [`Script`](NodeType::Script) node, representing
490 /// the JavaScript code setting the attribute.
491 ///
492 /// The actee node will be a [`HtmlElement`](NodeType::HtmlElement) node,
493 /// representing the HTML element that is having an attribute set on it.
494 SetAttribute {
495 /// The name of the HTML attribute being set (e.g., `height`,
496 /// `href`, `src`).
497 key: HtmlAttr,
498 /// The value being assigned to the HTML attribute (if any).
499 value: Option<String>,
500 /// If the attribute being set is part of the the element's
501 /// CSS style definition.
502 is_style: bool,
503 },
504 /// `DeleteAttribute` edges encode JavaScript code deleting an attribute
505 /// from a HTML element.
506 ///
507 /// The actor node will be [`Script`](NodeType::Script) node, representing
508 /// the JavaScript code deleting the attribute.
509 ///
510 /// The actee node will be a [`HtmlElement`](NodeType::HtmlElement) node,
511 /// representing the HTML element that is having the attribute deleted.
512 DeleteAttribute {
513 /// The name of the HTML attribute being deleted (e.g., `height`,
514 /// `href`, `src`).
515 key: HtmlAttr,
516 /// If the attribute being deleted is part of the the element's
517 /// CSS style definition.
518 is_style: bool,
519 },
520 Binding {},
521 BindingEvent {
522 script_position: usize,
523 },
524 Filter {},
525 Structure {},
526 Shield {},
527 ResourceBlock {},
528 StorageBucket {},
529}