Skip to main content

pf_world/
browser.rs

1// SPDX-License-Identifier: MIT
2//! Browser DOM capture via Chrome DevTools Protocol.
3//!
4//! Closes the §M2 World layer 'browser DOM via CDP for
5//! Playwright/Puppeteer sandboxes' deliverable. Connects to a
6//! Chromium instance launched with `--remote-debugging-port=<port>`
7//! and snapshots every open page:
8//!
9//! - URL, title, viewport, scroll position, devicePixelRatio
10//! - Page.captureSnapshot (MHTML) — the entire serialised page
11//! - DOMStorage.getDOMStorageItems for localStorage AND sessionStorage
12//! - Network.getCookies (subject to env-scrub regex if the operator wants)
13//!
14//! On restore we emit instructions for re-spawning Chromium with the
15//! same flags and CDP-loading each MHTML; auto-spawn is operator-led
16//! since headless flags vary widely.
17
18use pf_core::cas::BlobStore;
19use pf_core::digest::Digest256;
20use serde::{Deserialize, Serialize};
21use std::sync::Arc;
22
23#[cfg(feature = "cdp-live")]
24mod live;
25#[cfg(feature = "cdp-live")]
26pub use live::CdpClient;
27
28/// Wire format of a captured browser session.
29#[derive(Clone, Debug, Serialize, Deserialize)]
30#[serde(tag = "kind")]
31pub enum BrowserBlob {
32    /// Real CDP capture from a Chromium instance.
33    #[serde(rename = "browser.cdp.v1")]
34    Cdp {
35        /// CDP endpoint we connected to (e.g. http://127.0.0.1:9222).
36        endpoint: String,
37        /// One per open tab/page.
38        pages: Vec<PageSnapshot>,
39    },
40    /// Placeholder for hosts where CDP capture is not available
41    /// (no chromium running, no `cdp-live` feature compiled).
42    #[serde(rename = "browser.unsupported.v1")]
43    Unsupported { reason: String },
44}
45
46/// Snapshot of a single browser page/tab.
47#[derive(Clone, Debug, Serialize, Deserialize)]
48pub struct PageSnapshot {
49    /// CDP page id (a UUID-like string).
50    pub target_id: String,
51    pub url: String,
52    pub title: String,
53    pub viewport_width: u32,
54    pub viewport_height: u32,
55    pub scroll_x: f64,
56    pub scroll_y: f64,
57    pub device_pixel_ratio: f64,
58    /// Digest of the MHTML body (so duplicates dedup like every other
59    /// blob in the .pfimg).
60    pub mhtml_digest: Digest256,
61    /// localStorage entries.
62    pub local_storage: std::collections::BTreeMap<String, String>,
63    /// sessionStorage entries.
64    pub session_storage: std::collections::BTreeMap<String, String>,
65    /// Cookies as a JSON-array of CDP CookieParam objects (we keep the
66    /// shape opaque to avoid refighting the Cookie schema for every
67    /// CDP minor revision).
68    pub cookies_digest: Digest256,
69}
70
71/// Captures the (optional) attached-browser DOM.
72///
73/// Construct with [`BrowserCapture::new`] passing the CDP HTTP endpoint
74/// (typically `http://127.0.0.1:9222`). Build the chromium with
75/// `--remote-debugging-port=9222 --headless=new` (or omit `--headless`
76/// for a visible browser). Then call [`BrowserCapture::capture`].
77pub struct BrowserCapture {
78    endpoint: Option<String>,
79}
80
81impl BrowserCapture {
82    /// Construct from an explicit endpoint. `None` = capture is skipped
83    /// and the resulting blob is `BrowserBlob::Unsupported`.
84    #[must_use]
85    pub fn new(endpoint: Option<String>) -> Self {
86        Self { endpoint }
87    }
88
89    /// Construct from `$PF_BROWSER_CDP` env var. Same Unsupported
90    /// fall-through if the env var is unset/empty.
91    #[must_use]
92    pub fn from_env() -> Self {
93        Self {
94            endpoint: std::env::var("PF_BROWSER_CDP")
95                .ok()
96                .filter(|s| !s.is_empty()),
97        }
98    }
99
100    /// Capture the current browser state. Always returns a digest
101    /// pointing at the on-disk JSON serialization of [`BrowserBlob`].
102    pub async fn capture(&self, blobs: &Arc<dyn BlobStore>) -> pf_core::Result<Digest256> {
103        let blob = match (&self.endpoint, cfg!(feature = "cdp-live")) {
104            (Some(endpoint), true) => self.capture_cdp(endpoint, blobs).await?,
105            (Some(_endpoint), false) => BrowserBlob::Unsupported {
106                reason: "pf-world built without `cdp-live` feature".into(),
107            },
108            (None, _) => BrowserBlob::Unsupported {
109                reason:
110                    "no CDP endpoint configured (set PF_BROWSER_CDP or pass to BrowserCapture::new)"
111                        .into(),
112            },
113        };
114        blobs.put(&serde_json::to_vec(&blob)?)
115    }
116
117    #[cfg(feature = "cdp-live")]
118    async fn capture_cdp(
119        &self,
120        endpoint: &str,
121        blobs: &Arc<dyn BlobStore>,
122    ) -> pf_core::Result<BrowserBlob> {
123        let client = live::CdpClient::new(endpoint.to_owned());
124        match client.capture(blobs.clone()).await {
125            Ok(pages) => Ok(BrowserBlob::Cdp {
126                endpoint: endpoint.to_owned(),
127                pages,
128            }),
129            Err(e) => {
130                tracing::warn!(?e, "CDP capture failed; emitting Unsupported placeholder");
131                Ok(BrowserBlob::Unsupported {
132                    reason: format!("CDP capture failed: {e}"),
133                })
134            }
135        }
136    }
137
138    #[cfg(not(feature = "cdp-live"))]
139    #[allow(clippy::unused_async, clippy::unused_self)]
140    async fn capture_cdp(
141        &self,
142        _endpoint: &str,
143        _blobs: &Arc<dyn BlobStore>,
144    ) -> pf_core::Result<BrowserBlob> {
145        unreachable!("capture_cdp only called when cdp-live is on")
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use pf_core::cas::MemBlobStore;
153
154    #[tokio::test]
155    async fn browser_capture_no_endpoint_returns_unsupported() {
156        let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
157        let cap = BrowserCapture::new(None);
158        let digest = cap.capture(&blobs).await.unwrap();
159        let bytes = blobs.get(&digest).unwrap();
160        let blob: BrowserBlob = serde_json::from_slice(&bytes).unwrap();
161        match blob {
162            BrowserBlob::Unsupported { reason } => {
163                assert!(reason.contains("CDP endpoint"), "{reason}");
164            }
165            BrowserBlob::Cdp { .. } => panic!("expected Unsupported, got Cdp"),
166        }
167    }
168
169    #[test]
170    fn page_snapshot_round_trips_through_json() {
171        let p = PageSnapshot {
172            target_id: "T-1".into(),
173            url: "https://example.com".into(),
174            title: "Example".into(),
175            viewport_width: 1280,
176            viewport_height: 800,
177            scroll_x: 0.0,
178            scroll_y: 42.5,
179            device_pixel_ratio: 2.0,
180            mhtml_digest: Digest256::of(b"mhtml"),
181            local_storage: [("k".to_owned(), "v".to_owned())].into(),
182            session_storage: std::collections::BTreeMap::default(),
183            cookies_digest: Digest256::of(b"[]"),
184        };
185        let s = serde_json::to_string(&p).unwrap();
186        let p2: PageSnapshot = serde_json::from_str(&s).unwrap();
187        assert_eq!(p.url, p2.url);
188        assert_eq!(p.viewport_width, p2.viewport_width);
189        assert_eq!(p.local_storage.get("k").map(String::as_str), Some("v"));
190    }
191
192    // Note: from_env() is exercised end-to-end by the operator-side
193    // CDP integration test (operator sets PF_BROWSER_CDP before
194    // running pf snapshot). We deliberately don't test it here
195    // because env mutation is `unsafe` since Rust 1.85 and pf-world
196    // has `#![deny(unsafe_code)]`.
197}