Skip to main content

plumb_cdp/
lib.rs

1//! # plumb-cdp
2//!
3//! Chromium DevTools Protocol driver for Plumb.
4//!
5//! This crate owns every interaction with the browser. It is the **only**
6//! Plumb crate where `unsafe` is permitted — and only for FFI-adjacent
7//! hot spots, each with an explicit `// SAFETY:` comment. The walking
8//! skeleton doesn't yet use `unsafe`; the override exists to preempt
9//! future friction when snapshot conversion lands.
10//!
11//! ## Supported Chromium versions
12//!
13//! Plumb accepts Chromium major versions in the inclusive range
14//! <code>[MIN_SUPPORTED_CHROMIUM_MAJOR]..=[MAX_SUPPORTED_CHROMIUM_MAJOR]</code>.
15//! The lower bound is the oldest major Plumb has validated against; the
16//! upper bound is the newest major tested up to. Both are public so
17//! callers can introspect the accepted range. Constraining the browser
18//! to a known range is part of Plumb's determinism guarantee
19//! (`docs/local/prd.md` §9, §16) — DOMSnapshot output stability is
20//! re-verified whenever the upper bound moves.
21//!
22//! ## Behavior
23//!
24//! [`ChromiumDriver::snapshot_all`] launches Chromium exactly once,
25//! validates [`Browser::version`](chromiumoxide::Browser::version),
26//! and then loops over the requested targets — for each it opens a
27//! fresh page, applies the per-target viewport via CDP
28//! `Emulation.setDeviceMetricsOverride`, navigates to the URL, and
29//! calls `DOMSnapshot.captureSnapshot` with the
30//! [`COMPUTED_STYLE_WHITELIST`] from PRD §10.3. Each CDP response is
31//! flattened into a [`PlumbSnapshot`] with deterministic ordering
32//! (nodes sorted by `dom_order`, computed styles inserted in
33//! whitelist order). [`ChromiumDriver::snapshot`] is a thin wrapper
34//! over `snapshot_all` for callers that only want a single target.
35//! The `plumb-fake://` URL scheme in `plumb-cli` is handled by
36//! [`FakeDriver`] from this crate's `test-fake` wiring.
37//!
38//! [`PersistentBrowser`] is the long-lived counterpart for callers
39//! that lint many URLs in one process (the MCP server). It launches
40//! Chromium once, validates the version, and gives each
41//! [`PersistentBrowser::snapshot`] call a fresh incognito
42//! `BrowserContext` so cookies and localStorage from call N do not
43//! leak into call N+1.
44
45#![deny(unsafe_op_in_unsafe_fn)]
46#![doc(
47    html_logo_url = "https://raw.githubusercontent.com/aram-devdocs/plumb/main/assets/brand/plumb-mark.svg",
48    html_favicon_url = "https://raw.githubusercontent.com/aram-devdocs/plumb/main/theme/favicon.svg"
49)]
50#![deny(missing_docs)]
51#![deny(clippy::unwrap_used, clippy::expect_used)]
52
53pub mod chrome_path;
54pub mod fetcher;
55
56use indexmap::IndexMap;
57use plumb_core::report::Rect;
58use plumb_core::snapshot::{SnapshotNode, TextBox};
59use plumb_core::{PlumbSnapshot, ViewportKey};
60use std::io;
61use std::path::{Path, PathBuf};
62use std::sync::{Arc, Mutex};
63
64use chromiumoxide::Page;
65use chromiumoxide::cdp::browser_protocol::browser::CloseParams as BrowserCloseParams;
66use chromiumoxide::cdp::browser_protocol::dom_snapshot::{
67    CaptureSnapshotParams, CaptureSnapshotReturns, DocumentSnapshot,
68};
69use chromiumoxide::cdp::browser_protocol::emulation::SetDeviceMetricsOverrideParams;
70use chromiumoxide::cdp::browser_protocol::network::{
71    CookieParam, Headers, SetCookiesParams, SetExtraHttpHeadersParams,
72};
73use chromiumoxide::cdp::browser_protocol::page::AddScriptToEvaluateOnNewDocumentParams;
74use chromiumoxide::cdp::browser_protocol::target::{
75    CreateBrowserContextParams, CreateTargetParams,
76};
77use chromiumoxide::detection::DetectionOptions;
78use chromiumoxide::{Browser, BrowserConfig, Handler};
79use futures_util::StreamExt;
80use serde::Deserialize;
81use tokio::task::JoinHandle;
82
83/// Lowest Chromium major version Plumb has validated against. Booting
84/// a Chromium binary with a smaller major refuses to run.
85pub const MIN_SUPPORTED_CHROMIUM_MAJOR: u32 = 131;
86
87/// Highest Chromium major version Plumb has tested up to. Booting a
88/// Chromium binary with a larger major refuses to run; bump this
89/// constant after running the e2e suite against the new major.
90pub const MAX_SUPPORTED_CHROMIUM_MAJOR: u32 = 150;
91
92/// CSS property whitelist passed to `DOMSnapshot.captureSnapshot` as the
93/// `computedStyles` argument.
94///
95/// The list is the canonical source of truth for which computed styles
96/// flow into [`PlumbSnapshot`] nodes. Order is significant — Chromium
97/// returns per-node style values as a parallel array indexed by this
98/// list, so silent reordering would mis-label every value.
99///
100/// Source of truth: PRD §10.3 (`docs/local/prd.md`).
101pub const COMPUTED_STYLE_WHITELIST: &[&str; 36] = &[
102    "font-size",
103    "font-family",
104    "font-weight",
105    "line-height",
106    "color",
107    "background-color",
108    "border-top-color",
109    "border-right-color",
110    "border-bottom-color",
111    "border-left-color",
112    "border-top-width",
113    "border-right-width",
114    "border-bottom-width",
115    "border-left-width",
116    "border-top-left-radius",
117    "border-top-right-radius",
118    "border-bottom-right-radius",
119    "border-bottom-left-radius",
120    "margin-top",
121    "margin-right",
122    "margin-bottom",
123    "margin-left",
124    "padding-top",
125    "padding-right",
126    "padding-bottom",
127    "padding-left",
128    "gap",
129    "row-gap",
130    "column-gap",
131    "display",
132    "position",
133    "box-shadow",
134    "opacity",
135    "z-index",
136    "width",
137    "height",
138];
139
140/// A snapshot target: URL + viewport + per-target capture knobs.
141///
142/// The capture knobs (`wait_for_selector`, `wait_ms`,
143/// `disable_animations`, `hide_scrollbars`, `pin_dpr`) are documented
144/// in PRD §15. They control browser-side behavior between navigation
145/// and `DOMSnapshot.captureSnapshot` and never flow into snapshot
146/// content — they only affect *when* the snapshot is captured and what
147/// CSS state the page is in at that moment.
148#[derive(Debug, Clone, PartialEq)]
149pub struct Target {
150    /// URL to navigate to. The `plumb-fake://` scheme is reserved for
151    /// deterministic fixtures used by tests and the walking-skeleton CLI.
152    pub url: String,
153    /// Named viewport.
154    pub viewport: ViewportKey,
155    /// Viewport width in CSS pixels.
156    pub width: u32,
157    /// Viewport height in CSS pixels.
158    pub height: u32,
159    /// Device pixel ratio.
160    pub device_pixel_ratio: f32,
161    /// Optional CSS selector to wait for before capturing the snapshot.
162    /// When set, the driver polls the page until at least one matching
163    /// element exists. Compatible with [`Self::wait_ms`] — both fire,
164    /// in order: selector first, then the additional sleep.
165    pub wait_for_selector: Option<String>,
166    /// Optional additional milliseconds to sleep before capturing the
167    /// snapshot, after navigation (and after [`Self::wait_for_selector`]).
168    pub wait_ms: Option<u64>,
169    /// Inject CSS that disables animations and transitions before the
170    /// page renders. Defaults to `true` — the historical Plumb behavior
171    /// (PRD §16) — and the CLI exposes a flag that flips this value.
172    pub disable_animations: bool,
173    /// Inject CSS that hides page-level scrollbars. Defaults to `true`
174    /// to match the Chromium launch arg `--hide-scrollbars`. The CSS
175    /// belt-and-suspenders covers cases where the launch arg alone is
176    /// not honored (e.g. older Chromium majors on certain platforms).
177    pub hide_scrollbars: bool,
178    /// Optional explicit device-pixel ratio override applied via
179    /// `Emulation.setDeviceMetricsOverride.deviceScaleFactor` instead of
180    /// using [`Self::device_pixel_ratio`]. When `None`, the existing
181    /// `device_pixel_ratio` is used. The CLI exposes this as `--dpr`.
182    pub pin_dpr: Option<f64>,
183}
184
185impl Target {
186    /// Effective device-scale factor for `Emulation.setDeviceMetricsOverride`.
187    ///
188    /// Prefers [`Self::pin_dpr`] when set, otherwise falls back to
189    /// [`Self::device_pixel_ratio`]. Centralizing the choice keeps the
190    /// "pin overrides default" rule in one place.
191    #[must_use]
192    pub fn effective_dpr(&self) -> f64 {
193        self.pin_dpr
194            .unwrap_or_else(|| f64::from(self.device_pixel_ratio))
195    }
196}
197
198impl Default for Target {
199    fn default() -> Self {
200        Self {
201            url: String::new(),
202            viewport: ViewportKey::new("desktop"),
203            width: 1280,
204            height: 800,
205            device_pixel_ratio: 1.0,
206            wait_for_selector: None,
207            wait_ms: None,
208            disable_animations: true,
209            hide_scrollbars: true,
210            pin_dpr: None,
211        }
212    }
213}
214
215/// Errors returned by drivers.
216#[derive(Debug, thiserror::Error)]
217#[non_exhaustive]
218pub enum CdpError {
219    /// An unknown URL scheme was passed to the fake driver.
220    #[error("FakeDriver does not recognize URL `{0}`")]
221    UnknownFakeUrl(String),
222    /// No suitable Chromium or Chrome executable could be found.
223    #[error("Chromium executable not found. {install_hint}")]
224    ChromiumNotFound {
225        /// Human-readable installation and override guidance.
226        install_hint: String,
227    },
228    /// The Chromium binary reported a major version outside Plumb's
229    /// supported range.
230    #[error(
231        "Chromium major version {found} is not supported (Plumb supports {min_supported}..={max_supported})"
232    )]
233    UnsupportedChromium {
234        /// Lowest validated major version (see
235        /// [`MIN_SUPPORTED_CHROMIUM_MAJOR`]).
236        min_supported: u32,
237        /// Highest tested major version (see
238        /// [`MAX_SUPPORTED_CHROMIUM_MAJOR`]).
239        max_supported: u32,
240        /// Detected major version.
241        found: u32,
242    },
243    /// The DOMSnapshot CDP response was malformed (missing index,
244    /// out-of-range string, empty document list, or any other shape
245    /// violation that prevents safe flattening).
246    #[error("DOMSnapshot response was malformed: {reason}")]
247    MalformedSnapshot {
248        /// What was wrong with the response.
249        reason: String,
250    },
251    /// A user-supplied cookie name/value contained illegal characters
252    /// (header injection guard — newlines are refused before reaching
253    /// the browser).
254    #[error("invalid cookie {field} `{input}`: {reason}")]
255    InvalidCookie {
256        /// Which cookie field failed validation (`name` or `value`).
257        field: &'static str,
258        /// The offending input.
259        input: String,
260        /// Reason the input was rejected.
261        reason: &'static str,
262    },
263    /// A user-supplied HTTP header name/value contained illegal
264    /// characters (header injection guard — newlines and `:` in names
265    /// are refused before reaching the browser).
266    #[error("invalid header {field} `{input}`: {reason}")]
267    InvalidHeader {
268        /// Which header field failed validation (`name` or `value`).
269        field: &'static str,
270        /// The offending input.
271        input: String,
272        /// Reason the input was rejected.
273        reason: &'static str,
274    },
275    /// A user-supplied path (auth-script or storage-state) failed the
276    /// safe-path check.
277    #[error("invalid path `{path}`: {reason}")]
278    InvalidPath {
279        /// The offending path.
280        path: PathBuf,
281        /// Reason the path was rejected.
282        reason: String,
283    },
284    /// Failed to parse a Playwright storage-state JSON file.
285    #[error("malformed storage-state file `{path}`: {reason}")]
286    MalformedStorageState {
287        /// The file the driver was reading.
288        path: PathBuf,
289        /// What went wrong.
290        reason: String,
291    },
292    /// Any other driver-level failure, carried as a boxed [`std::error::Error`].
293    #[error("driver failure: {0}")]
294    Driver(#[source] Box<dyn std::error::Error + Send + Sync>),
295    /// Auto-fetch (`--auto-fetch-chromium`) failed to download or
296    /// install Chromium. Wraps the upstream chromiumoxide fetcher
297    /// failure in a typed Plumb error so the CLI can surface a single
298    /// "auto-fetch could not produce a working binary" message.
299    #[error("Chromium auto-fetch failed: {reason}")]
300    AutoFetchFailed {
301        /// Human-readable reason (download / unzip / options error).
302        reason: String,
303    },
304    /// A cached Chromium binary's SHA-256 disagrees with the recorded
305    /// `.plumb-sha256` sidecar. Plumb refuses to launch the binary so
306    /// a tampered cache cannot silently be promoted into an
307    /// arbitrary-code-execution path.
308    #[error(
309        "Chromium binary `{}` failed hash verification: expected {expected}, found {found}",
310        path.display()
311    )]
312    HashMismatch {
313        /// Path of the offending binary.
314        path: PathBuf,
315        /// Hex SHA-256 from the sidecar (the value Plumb originally
316        /// trusted).
317        expected: String,
318        /// Hex SHA-256 of the binary as it currently exists.
319        found: String,
320    },
321    /// Auto-fetch needs a platform cache directory, but the host
322    /// environment did not provide enough information to resolve one
323    /// (no `HOME` / `LOCALAPPDATA` / `XDG_CACHE_HOME`).
324    #[error("could not resolve a Plumb cache directory: {reason}")]
325    CacheDirUnavailable {
326        /// Human-readable reason (which env var was missing).
327        reason: String,
328    },
329}
330
331/// A cookie to install before navigation.
332///
333/// User-supplied cookies are validated for header-injection-style
334/// payloads (newlines, NULs) before flowing into a CDP `Network.setCookies`
335/// request. A `None` `url` means the cookie is bound to whatever URL the
336/// target ends up navigating to.
337#[derive(Debug, Clone, PartialEq, Eq, Default)]
338pub struct Cookie {
339    /// Cookie name.
340    pub name: String,
341    /// Cookie value.
342    pub value: String,
343    /// Optional explicit URL the cookie is associated with. When `None`,
344    /// the cookie is associated with the target URL on injection.
345    pub url: Option<String>,
346    /// Optional cookie domain.
347    pub domain: Option<String>,
348    /// Optional cookie path (defaults to `/`).
349    pub path: Option<String>,
350    /// Optional `Secure` flag.
351    pub secure: Option<bool>,
352    /// Optional `HttpOnly` flag.
353    pub http_only: Option<bool>,
354}
355
356impl Cookie {
357    /// Construct a cookie from a `name=value` token. The pre-navigation
358    /// helper attaches the target URL on injection.
359    ///
360    /// # Errors
361    ///
362    /// Returns [`CdpError::InvalidCookie`] when:
363    /// - The token has no `=` separator.
364    /// - The name is empty or contains whitespace / control bytes.
365    /// - The value contains control bytes (header injection).
366    pub fn parse_kv(token: &str) -> Result<Self, CdpError> {
367        let (name, value) = token
368            .split_once('=')
369            .ok_or_else(|| CdpError::InvalidCookie {
370                field: "name",
371                input: token.to_owned(),
372                reason: "expected `name=value`",
373            })?;
374        let name = name.trim().to_owned();
375        let value = value.to_owned();
376        validate_cookie_name(&name)?;
377        validate_cookie_value(&value)?;
378        Ok(Self {
379            name,
380            value,
381            ..Self::default()
382        })
383    }
384
385    fn into_cdp_param(self, default_url: Option<&str>) -> CookieParam {
386        let mut param = CookieParam::new(self.name, self.value);
387        param.url = self.url.or_else(|| default_url.map(str::to_owned));
388        param.domain = self.domain;
389        param.path = self.path;
390        param.secure = self.secure;
391        param.http_only = self.http_only;
392        param
393    }
394}
395
396/// Reject any byte that is a C0 control character (`< 0x20`) or DEL
397/// (`0x7F`). Plumb chooses to reject every C0 byte rather than only the
398/// HTTP-specific CR/LF/NUL trio because a cookie or header value with
399/// any control byte is almost certainly a smuggling attempt and never
400/// a legitimate input. Tab (`\t`, `0x09`) is also rejected; HTTP
401/// whitespace folding has been deprecated in RFC 7230 §3.2.4 and Plumb
402/// has no compatibility need for it on inputs the user types into a
403/// shell flag.
404fn is_disallowed_ctl(byte: u8) -> bool {
405    byte < 0x20 || byte == 0x7F
406}
407
408fn validate_no_ctl(input: &str, field: &'static str, kind: &'static str) -> Result<(), CdpError> {
409    if input.bytes().any(is_disallowed_ctl) {
410        return match kind {
411            "cookie" => Err(CdpError::InvalidCookie {
412                field,
413                input: input.to_owned(),
414                reason: "control characters (C0 / DEL) are not allowed",
415            }),
416            _ => Err(CdpError::InvalidHeader {
417                field,
418                input: input.to_owned(),
419                reason: "control characters (C0 / DEL) are not allowed",
420            }),
421        };
422    }
423    Ok(())
424}
425
426/// Validate an HTTP header name. Rejects empty names, names containing
427/// `:` (the field-line separator), whitespace, or control bytes.
428///
429/// Shared between [`parse_header_kv`] (CLI input parser) and the
430/// pre-injection sweep in `install_extra_headers` (library boundary).
431fn validate_header_name(name: &str) -> Result<(), CdpError> {
432    if name.is_empty() {
433        return Err(CdpError::InvalidHeader {
434            field: "name",
435            input: name.to_owned(),
436            reason: "name must not be empty",
437        });
438    }
439    if name
440        .bytes()
441        .any(|b| b == b':' || b == b' ' || b == b'\t' || is_disallowed_ctl(b))
442    {
443        return Err(CdpError::InvalidHeader {
444            field: "name",
445            input: name.to_owned(),
446            reason: "name must not contain whitespace, `:`, or control bytes",
447        });
448    }
449    Ok(())
450}
451
452/// Validate a cookie name. Rejects empty names, names containing `=`
453/// (the cookie separator), whitespace, or control bytes.
454///
455/// Shared between [`Cookie::parse_kv`] (CLI input parser) and the
456/// pre-injection sweep in `install_cookies` (library boundary). The
457/// rules mirror RFC 6265 token characters minus the bytes Chromium's
458/// `Network.setCookies` would reject.
459fn validate_cookie_name(name: &str) -> Result<(), CdpError> {
460    if name.is_empty() {
461        return Err(CdpError::InvalidCookie {
462            field: "name",
463            input: name.to_owned(),
464            reason: "name must not be empty",
465        });
466    }
467    if name
468        .bytes()
469        .any(|b| b == b'=' || b == b' ' || b == b'\t' || is_disallowed_ctl(b))
470    {
471        return Err(CdpError::InvalidCookie {
472            field: "name",
473            input: name.to_owned(),
474            reason: "name must not contain whitespace, `=`, or control bytes",
475        });
476    }
477    Ok(())
478}
479
480/// Validate a cookie value. Rejects values containing whitespace
481/// (which Chromium normalizes inconsistently) or control bytes.
482///
483/// Shared between [`Cookie::parse_kv`] and `install_cookies`.
484fn validate_cookie_value(value: &str) -> Result<(), CdpError> {
485    if value.bytes().any(is_disallowed_ctl) {
486        return Err(CdpError::InvalidCookie {
487            field: "value",
488            input: value.to_owned(),
489            reason: "control characters (C0 / DEL) are not allowed",
490        });
491    }
492    Ok(())
493}
494
495/// Parse and validate an HTTP header `name: value` token.
496///
497/// # Errors
498///
499/// Returns [`CdpError::InvalidHeader`] when:
500/// - The token has no `:` separator.
501/// - The name is empty or contains whitespace / `:` / control bytes.
502/// - The value contains control bytes (header injection).
503pub fn parse_header_kv(token: &str) -> Result<(String, String), CdpError> {
504    let (name, value) = token
505        .split_once(':')
506        .ok_or_else(|| CdpError::InvalidHeader {
507            field: "name",
508            input: token.to_owned(),
509            reason: "expected `name: value`",
510        })?;
511    let name = name.trim().to_owned();
512    let value = value.trim_start().to_owned();
513    validate_header_name(&name)?;
514    validate_no_ctl(&value, "value", "header")?;
515    Ok((name, value))
516}
517
518/// Playwright `storage-state.json` representation.
519///
520/// Matches the format Playwright writes via
521/// [`browserContext.storageState()`](https://playwright.dev/docs/api/class-browsercontext#browser-context-storage-state)
522/// — a `cookies` array plus an `origins` array of `{ origin,
523/// localStorage }`. Deserialized with `deny_unknown_fields` so a
524/// future Playwright addition fails loudly rather than being silently
525/// ignored.
526#[derive(Debug, Clone, Default, PartialEq, Deserialize)]
527#[serde(deny_unknown_fields)]
528pub struct StorageState {
529    /// Cookies preserved across the session.
530    #[serde(default)]
531    pub cookies: Vec<StorageStateCookie>,
532    /// Per-origin localStorage entries.
533    #[serde(default)]
534    pub origins: Vec<StorageStateOrigin>,
535}
536
537/// One cookie entry in a Playwright `storage-state.json`.
538#[derive(Debug, Clone, PartialEq, Deserialize)]
539#[serde(deny_unknown_fields)]
540pub struct StorageStateCookie {
541    /// Cookie name.
542    pub name: String,
543    /// Cookie value.
544    pub value: String,
545    /// Cookie domain.
546    pub domain: String,
547    /// Cookie path.
548    pub path: String,
549    /// Cookie expiration as a Unix timestamp; Playwright uses `-1` for
550    /// session cookies.
551    #[serde(default)]
552    pub expires: f64,
553    /// `HttpOnly` flag.
554    #[serde(default, rename = "httpOnly")]
555    pub http_only: bool,
556    /// `Secure` flag.
557    #[serde(default)]
558    pub secure: bool,
559    /// `SameSite` attribute (typically `"Strict" | "Lax" | "None"`).
560    #[serde(default, rename = "sameSite")]
561    pub same_site: Option<String>,
562}
563
564/// One `origins[]` entry in a Playwright `storage-state.json`.
565#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
566#[serde(deny_unknown_fields)]
567pub struct StorageStateOrigin {
568    /// The origin URL (e.g. `https://example.com`).
569    pub origin: String,
570    /// `localStorage` entries for the origin.
571    #[serde(default, rename = "localStorage")]
572    pub local_storage: Vec<StorageStateLocalStorageEntry>,
573}
574
575/// One `localStorage[]` entry in a Playwright `storage-state.json`.
576#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
577#[serde(deny_unknown_fields)]
578pub struct StorageStateLocalStorageEntry {
579    /// localStorage key.
580    pub name: String,
581    /// localStorage value.
582    pub value: String,
583}
584
585impl StorageState {
586    /// Parse a Playwright `storage-state.json` from a string.
587    ///
588    /// Validates every cookie name, value, domain, and path for
589    /// header-injection-style payloads (control bytes) and sorts the
590    /// cookies / origins / localStorage entries for deterministic
591    /// injection order.
592    ///
593    /// # Errors
594    ///
595    /// Returns [`CdpError::MalformedStorageState`] with `path = ""` when
596    /// the JSON cannot be parsed. Returns [`CdpError::InvalidCookie`]
597    /// when a cookie field contains control bytes. Callers that have a
598    /// real path on hand should use [`Self::load_from_path`] instead so
599    /// the error carries the source filename.
600    pub fn parse_str(json: &str) -> Result<Self, CdpError> {
601        let mut state: Self =
602            serde_json::from_str(json).map_err(|err| CdpError::MalformedStorageState {
603                path: PathBuf::new(),
604                reason: err.to_string(),
605            })?;
606        // Validate every cookie name/value/domain/path for
607        // header-injection style payloads — Playwright files are
608        // typically machine-written but Plumb cannot trust their
609        // provenance. `domain` and `path` flow into a CDP
610        // `Network.setCookies` call alongside the name/value, so an
611        // unchecked CR/LF in either field would smuggle just as
612        // effectively as one in the value.
613        for cookie in &state.cookies {
614            validate_no_ctl(&cookie.name, "name", "cookie")?;
615            validate_no_ctl(&cookie.value, "value", "cookie")?;
616            validate_no_ctl(&cookie.domain, "domain", "cookie")?;
617            validate_no_ctl(&cookie.path, "path", "cookie")?;
618        }
619        // Sort cookies and origins for deterministic injection order.
620        state.cookies.sort_by(|a, b| {
621            (a.domain.as_str(), a.name.as_str()).cmp(&(b.domain.as_str(), b.name.as_str()))
622        });
623        state.origins.sort_by(|a, b| a.origin.cmp(&b.origin));
624        for origin in &mut state.origins {
625            origin.local_storage.sort_by(|a, b| a.name.cmp(&b.name));
626        }
627        Ok(state)
628    }
629
630    /// Read and parse a storage-state file from disk.
631    ///
632    /// # Errors
633    ///
634    /// Returns [`CdpError::InvalidPath`] when the path fails the safe-path
635    /// check, or [`CdpError::MalformedStorageState`] when the file cannot
636    /// be read or parsed.
637    ///
638    /// # Security boundary
639    ///
640    /// The safe-path check via `canonicalize_safe_path` is
641    /// **best-effort** only — see that function's docs. The
642    /// canonicalize-then-open sequence has an inherent TOCTOU window
643    /// where a co-located attacker with write access to a parent
644    /// directory could swap the resolved file for a symlink between
645    /// the check and the read. Plumb's storage-state loader is
646    /// intended for files the invoking user controls (typically a
647    /// Playwright export checked into the project). It MUST NOT be
648    /// treated as a sandbox against hostile local users. The full
649    /// mitigation (`cap_std::Dir::open`) is out of scope for the wave
650    /// that introduced this loader.
651    pub fn load_from_path(path: &Path) -> Result<Self, CdpError> {
652        let canonical = canonicalize_safe_path(path)?;
653        let bytes =
654            std::fs::read_to_string(&canonical).map_err(|err| CdpError::MalformedStorageState {
655                path: canonical.clone(),
656                reason: err.to_string(),
657            })?;
658        // Re-stamp `MalformedStorageState` errors with the source path
659        // so callers see *which* file failed; cookie-validation errors
660        // pass through unchanged because they carry the offending input
661        // rather than a path.
662        Self::parse_str(&bytes).map_err(|err| match err {
663            CdpError::MalformedStorageState { reason, .. } => CdpError::MalformedStorageState {
664                path: canonical,
665                reason,
666            },
667            other => other,
668        })
669    }
670}
671
672/// Public CLI-facing wrapper around `canonicalize_safe_path`.
673///
674/// `plumb-cli` validates `--auth-script` / `--storage-state` paths up
675/// front (before driver dispatch) so the FakeDriver path also rejects
676/// outside-CWD inputs — without this, the safe-path check would only
677/// fire on the real Chromium code path and tests against
678/// `plumb-fake://hello` would silently accept a malicious-looking
679/// `--auth-script /etc/passwd`.
680///
681/// # Errors
682///
683/// Returns [`CdpError::InvalidPath`] when `path` cannot be
684/// canonicalized or canonicalizes to a location outside the current
685/// working directory.
686///
687/// # Security boundary
688///
689/// Same caveats as `canonicalize_safe_path`: this is a best-effort
690/// usability guard, **not** a sandbox. See that function's docs for
691/// the full TOCTOU discussion.
692pub fn validate_safe_path(path: &Path) -> Result<PathBuf, CdpError> {
693    canonicalize_safe_path(path)
694}
695
696/// Canonicalize `path` and reject symlinks pointing outside the current
697/// working directory.
698///
699/// `--auth-script` and `--storage-state` accept arbitrary file paths,
700/// so the caller-side check is the last guard before we read user
701/// content. The check refuses paths that:
702/// - cannot be canonicalized (file does not exist / no permission),
703/// - resolve to a different prefix than the current working directory.
704///
705/// # Security boundary
706///
707/// This is a **best-effort** guard against accidental path issues
708/// (typos, copy-pasted absolute paths, runs from the wrong CWD). It is
709/// **not** a security boundary against a co-located attacker who can
710/// race the file system — the canonicalize step and the subsequent
711/// `std::fs::read_to_string` are two separate `open(2)` syscalls, and
712/// an attacker with write access to a parent directory of `path` can
713/// swap the canonicalized target for a symlink between the check and
714/// the read (TOCTOU). A full mitigation would use `cap_std::Dir::open`
715/// to keep the canonicalization and the read inside a single
716/// directory handle; that change is out of scope for the wave that
717/// added this helper.
718///
719/// Future maintainers MUST NOT assume this function defends against a
720/// hostile local user. Treat it as a usability check, not a sandbox.
721fn canonicalize_safe_path(path: &Path) -> Result<PathBuf, CdpError> {
722    let canonical = path.canonicalize().map_err(|err| CdpError::InvalidPath {
723        path: path.to_path_buf(),
724        reason: format!("could not canonicalize: {err}"),
725    })?;
726    let cwd = std::env::current_dir().map_err(|err| CdpError::InvalidPath {
727        path: path.to_path_buf(),
728        reason: format!("could not read CWD: {err}"),
729    })?;
730    let cwd_canonical = cwd.canonicalize().unwrap_or(cwd);
731    if !canonical.starts_with(&cwd_canonical) {
732        return Err(CdpError::InvalidPath {
733            path: path.to_path_buf(),
734            reason: format!(
735                "path resolves to `{}`, which is outside the current working directory `{}`",
736                canonical.display(),
737                cwd_canonical.display()
738            ),
739        });
740    }
741    Ok(canonical)
742}
743
744/// Async trait for browser drivers. Implementations are expected to be
745/// cheap to construct and expensive per-call.
746pub trait BrowserDriver: Send + Sync {
747    /// Snapshot a single target.
748    fn snapshot(
749        &self,
750        target: Target,
751    ) -> impl std::future::Future<Output = Result<PlumbSnapshot, CdpError>> + Send;
752
753    /// Snapshot a list of targets, reusing a single browser session
754    /// for the whole batch. The default implementation calls
755    /// [`snapshot`](BrowserDriver::snapshot) per target and is suitable
756    /// for cheap drivers (e.g. [`FakeDriver`]). Real drivers MUST
757    /// override this to launch the browser exactly once per batch.
758    ///
759    /// Snapshots are returned in the same order as `targets`.
760    fn snapshot_all(
761        &self,
762        targets: Vec<Target>,
763    ) -> impl std::future::Future<Output = Result<Vec<PlumbSnapshot>, CdpError>> + Send {
764        async move {
765            let mut out = Vec::with_capacity(targets.len());
766            for target in targets {
767                out.push(self.snapshot(target).await?);
768            }
769            Ok(out)
770        }
771    }
772}
773
774/// Configuration for [`ChromiumDriver`].
775#[derive(Debug, Default, Clone, PartialEq, Eq)]
776pub struct ChromiumOptions {
777    /// Explicit Chrome or Chromium executable path. When unset, Plumb asks
778    /// `chromiumoxide` to detect stable Chrome/Chromium installations.
779    pub executable_path: Option<PathBuf>,
780    /// Override the Chromium profile directory. When unset, `chromiumoxide`
781    /// reuses a single temp directory across all launches — which is fine
782    /// for sequential CLI invocations but causes profile-singleton lock
783    /// contention when multiple drivers run concurrently (e.g. the e2e
784    /// test suite). Tests pass per-thread tempdirs here.
785    ///
786    /// Profile contents do not flow into [`PlumbSnapshot`] output, so
787    /// varying this path does not violate the determinism invariant.
788    pub user_data_dir: Option<PathBuf>,
789    /// Cookies to install before navigation (PRD §15 — `--cookie`).
790    /// Iterated in `(name, value)` order for deterministic CDP traffic.
791    pub cookies: Vec<Cookie>,
792    /// Extra HTTP headers to attach to every request (PRD §15 —
793    /// `--header`). Sorted by name on injection so CDP traffic is
794    /// stable across runs.
795    pub headers: Vec<(String, String)>,
796    /// Path to a JavaScript file evaluated on every new document via
797    /// `Page.addScriptToEvaluateOnNewDocument` before navigation
798    /// (PRD §15 — `--auth-script`).
799    pub auth_script: Option<PathBuf>,
800    /// Path to a Playwright `storage-state.json` file. Cookies in the
801    /// file are installed before navigation; localStorage entries are
802    /// preserved as a parsed [`StorageState`] for downstream evaluation
803    /// after navigation when the origin matches.
804    pub storage_state: Option<PathBuf>,
805    /// Opt-in: when no [`Self::executable_path`] is set and no system
806    /// Chromium is detected, download Chrome-for-Testing pinned at
807    /// [`MIN_SUPPORTED_CHROMIUM_MAJOR`] into a Plumb-managed cache
808    /// directory and verify its SHA-256 before launch. Defaults to
809    /// `false`. See [`fetcher`] for the security trade-offs.
810    pub auto_fetch_chromium: bool,
811    /// Override the auto-fetch cache directory. When `None`, Plumb
812    /// resolves the platform default via [`fetcher::resolve_cache_dir`].
813    /// Useful for tests that want a tempdir-scoped cache and for
814    /// callers that ship Chromium alongside their app.
815    pub auto_fetch_cache_dir: Option<PathBuf>,
816}
817
818/// Real Chromium-backed driver.
819#[derive(Debug, Default, Clone, PartialEq, Eq)]
820pub struct ChromiumDriver {
821    options: ChromiumOptions,
822}
823
824impl ChromiumDriver {
825    /// Build a driver with explicit options.
826    #[must_use]
827    pub fn new(options: ChromiumOptions) -> Self {
828        Self { options }
829    }
830
831    fn browser_config(
832        &self,
833        target: &Target,
834        resolved_executable: Option<&Path>,
835    ) -> Result<BrowserConfig, CdpError> {
836        // PRD §16: pinning launch args removes a class of nondeterminism
837        // (scrollbar overlay differences across DPRs, OS-level scaling).
838        let scale_factor_arg = format!("--force-device-scale-factor={}", target.device_pixel_ratio);
839        let builder = BrowserConfig::builder()
840            .chrome_detection(DetectionOptions {
841                msedge: false,
842                unstable: false,
843            })
844            .window_size(target.width, target.height)
845            .arg("--hide-scrollbars")
846            .arg(scale_factor_arg);
847
848        // Precedence:
849        //   1. caller-resolved path (auto-fetch produced one),
850        //   2. user-supplied `executable_path`,
851        //   3. macOS `.app`-bundle priority list (see `chrome_path::detect`),
852        //   4. chromiumoxide auto-detect (no `chrome_executable` call).
853        let builder = if let Some(path) = resolved_executable {
854            ensure_executable_path(path)?;
855            builder.chrome_executable(path)
856        } else if let Some(path) = &self.options.executable_path {
857            ensure_executable_path(path)?;
858            builder.chrome_executable(path)
859        } else if let Some(path) = chrome_path::detect() {
860            // No need to `ensure_executable_path` — `detect` only
861            // returns paths that already passed an `is_file` probe.
862            builder.chrome_executable(path)
863        } else {
864            builder
865        };
866
867        let builder = if let Some(profile) = &self.options.user_data_dir {
868            builder.user_data_dir(profile)
869        } else {
870            builder
871        };
872
873        builder.build().map_err(|_| chromium_not_found())
874    }
875}
876
877impl BrowserDriver for ChromiumDriver {
878    async fn snapshot(&self, target: Target) -> Result<PlumbSnapshot, CdpError> {
879        let mut snapshots = self.snapshot_all(vec![target]).await?;
880        snapshots.pop().ok_or_else(|| {
881            // Unreachable in practice: `snapshot_all` returns one snapshot per
882            // input target on the success path. Treat a violation of that
883            // contract as an internal driver fault rather than panicking.
884            CdpError::Driver(Box::new(io::Error::other(
885                "ChromiumDriver::snapshot_all returned no snapshot for a single target",
886            )))
887        })
888    }
889
890    async fn snapshot_all(&self, targets: Vec<Target>) -> Result<Vec<PlumbSnapshot>, CdpError> {
891        if targets.is_empty() {
892            return Ok(Vec::new());
893        }
894
895        // Use the first target's dimensions and DPR for the initial
896        // launch (the `--force-device-scale-factor` arg is fixed at
897        // launch time). Per-target viewport / DPR is then applied via
898        // CDP `Emulation.setDeviceMetricsOverride` inside
899        // `capture_target`, which overrides the launch-time scale
900        // factor for every page after the first.
901        let first = &targets[0];
902        let resolved_executable = resolve_auto_fetch(&self.options).await?;
903        let config = self.browser_config(first, resolved_executable.as_deref())?;
904        let mut session = ChromiumSession::launch(config).await?;
905
906        let result: Result<Vec<PlumbSnapshot>, CdpError> = async {
907            validate_browser_version(&session.browser).await?;
908            let mut snapshots = Vec::with_capacity(targets.len());
909            for target in &targets {
910                let snap = capture_target(&session.browser, target, &self.options).await?;
911                snapshots.push(snap);
912            }
913            Ok(snapshots)
914        }
915        .await;
916
917        if let Err(cleanup_err) = session.shutdown().await {
918            tracing::debug!(error = %cleanup_err, "failed to clean up Chromium session");
919            if result.is_ok() {
920                return Err(cleanup_err);
921            }
922        }
923
924        result
925    }
926}
927
928async fn capture_target(
929    browser: &Browser,
930    target: &Target,
931    options: &ChromiumOptions,
932) -> Result<PlumbSnapshot, CdpError> {
933    let page = browser
934        .new_page("about:blank")
935        .await
936        .map_err(driver_error)?;
937
938    capture_on_page(&page, target, options).await
939}
940
941/// Apply viewport / animation hooks, install cookies and headers,
942/// navigate, capture a DOM snapshot.
943///
944/// Shared between `ChromiumDriver::capture_target` and
945/// [`PersistentBrowser::snapshot`] so that the per-target work is
946/// expressed in exactly one place. The function is split into discrete
947/// stages — `apply_viewport` (DPR + dimensions), `pre_navigate`
948/// (cookies, headers, auth-script, storage-state, animation killer,
949/// scrollbar killer), `goto` + waits, then capture.
950async fn capture_on_page(
951    page: &Page,
952    target: &Target,
953    options: &ChromiumOptions,
954) -> Result<PlumbSnapshot, CdpError> {
955    apply_viewport(page, target).await?;
956    // `pre_navigate` returns the parsed `StorageState` (when one is
957    // configured) so the post-navigate localStorage step reuses the
958    // same parsed value. Loading the file twice would open a
959    // time-of-check / time-of-use race where the file changes between
960    // cookie installation and localStorage replay.
961    let storage_state = pre_navigate(page, target, options).await?;
962
963    page.goto(target.url.as_str()).await.map_err(driver_error)?;
964    page.wait_for_navigation().await.map_err(driver_error)?;
965
966    apply_post_navigate_waits(page, target).await?;
967    apply_storage_state_local_storage(page, target, storage_state.as_ref()).await?;
968
969    let params = CaptureSnapshotParams {
970        computed_styles: COMPUTED_STYLE_WHITELIST
971            .iter()
972            .map(|s| (*s).to_string())
973            .collect(),
974        include_paint_order: Some(true),
975        include_dom_rects: Some(true),
976        include_blended_background_colors: Some(true),
977        include_text_color_opacities: None,
978    };
979
980    let response = page.execute(params).await.map_err(driver_error)?;
981    flatten_snapshot(target, &response.result)
982}
983
984/// A persistent Chromium browser kept warm across multiple snapshots.
985///
986/// Each [`PersistentBrowser::snapshot`] call creates a fresh
987/// **incognito browser context** (`Target.createBrowserContext`),
988/// opens a page in it, captures the snapshot, and disposes the
989/// context — so cookies, localStorage, and any other origin-scoped
990/// state from call N never leak into call N+1. The underlying Chromium
991/// process stays alive until [`PersistentBrowser::shutdown`] is called
992/// or the value is dropped.
993///
994/// Cheap to clone — clones share the same underlying browser via
995/// [`Arc`]. Implements [`BrowserDriver`].
996#[derive(Clone, Debug)]
997pub struct PersistentBrowser {
998    inner: Arc<PersistentBrowserInner>,
999}
1000
1001#[derive(Debug)]
1002struct PersistentBrowserInner {
1003    browser: Browser,
1004    handler_task: Mutex<Option<JoinHandle<()>>>,
1005    options: ChromiumOptions,
1006}
1007
1008impl PersistentBrowser {
1009    /// Launch Chromium and validate its version.
1010    ///
1011    /// Per-call viewport and DPR are applied via
1012    /// `Emulation.setDeviceMetricsOverride` inside [`Self::snapshot`],
1013    /// so the launch-time defaults here are placeholders sized to a
1014    /// 1280×800 desktop window.
1015    ///
1016    /// # Errors
1017    ///
1018    /// Returns [`CdpError::ChromiumNotFound`] when no Chromium binary
1019    /// can be located, [`CdpError::UnsupportedChromium`] when the
1020    /// detected Chromium reports a major version outside the supported
1021    /// range, or [`CdpError::Driver`] for any other launch failure.
1022    pub async fn launch(options: ChromiumOptions) -> Result<Self, CdpError> {
1023        let resolved_executable = resolve_auto_fetch(&options).await?;
1024        let config = persistent_browser_config(&options, resolved_executable.as_deref())?;
1025        let (browser, handler) = Browser::launch(config).await.map_err(map_launch_error)?;
1026        let handler_task = poll_handler(handler);
1027
1028        // Validate the version before stashing the browser in `Arc` —
1029        // on failure, dropping the browser here causes
1030        // `Browser::drop` to reap the child synchronously.
1031        if let Err(err) = validate_browser_version(&browser).await {
1032            handler_task.abort();
1033            drop(browser);
1034            return Err(err);
1035        }
1036
1037        Ok(Self {
1038            inner: Arc::new(PersistentBrowserInner {
1039                browser,
1040                handler_task: Mutex::new(Some(handler_task)),
1041                options,
1042            }),
1043        })
1044    }
1045
1046    /// Snapshot a single target inside a fresh incognito browser context.
1047    ///
1048    /// # Errors
1049    ///
1050    /// Returns the same error variants as [`ChromiumDriver::snapshot`]:
1051    /// [`CdpError::Driver`] for CDP failures and
1052    /// [`CdpError::MalformedSnapshot`] when the response cannot be
1053    /// flattened.
1054    pub async fn snapshot(&self, target: Target) -> Result<PlumbSnapshot, CdpError> {
1055        let ctx_id = self
1056            .inner
1057            .browser
1058            .create_browser_context(CreateBrowserContextParams::default())
1059            .await
1060            .map_err(driver_error)?;
1061
1062        let result: Result<PlumbSnapshot, CdpError> = async {
1063            let create_params = CreateTargetParams {
1064                url: "about:blank".to_string(),
1065                left: None,
1066                top: None,
1067                width: None,
1068                height: None,
1069                window_state: None,
1070                browser_context_id: Some(ctx_id.clone()),
1071                enable_begin_frame_control: None,
1072                new_window: None,
1073                background: None,
1074                for_tab: None,
1075                hidden: None,
1076            };
1077            let page = self
1078                .inner
1079                .browser
1080                .new_page(create_params)
1081                .await
1082                .map_err(driver_error)?;
1083            capture_on_page(&page, &target, &self.inner.options).await
1084        }
1085        .await;
1086
1087        // Always dispose the incognito context, even on failure. Mirror
1088        // the swallow-and-log pattern from `ChromiumSession::shutdown`
1089        // so cleanup errors never mask the underlying snapshot result.
1090        if let Err(err) = self
1091            .inner
1092            .browser
1093            .dispose_browser_context(ctx_id)
1094            .await
1095            .map_err(driver_error)
1096        {
1097            tracing::debug!(error = %err, "failed to dispose incognito browser context");
1098        }
1099
1100        result
1101    }
1102
1103    /// Gracefully close the underlying browser and abort the handler
1104    /// task.
1105    ///
1106    /// Idempotent — safe to call more than once. The first call sends
1107    /// `Browser.close` over CDP and aborts the handler task; subsequent
1108    /// calls observe the absent handle and return `Ok(())`.
1109    ///
1110    /// # Errors
1111    ///
1112    /// Currently never returns an error: cleanup failures are logged
1113    /// at `debug` and swallowed so callers can use `shutdown` as a
1114    /// best-effort hook on MCP exit. The signature retains `Result`
1115    /// for forward-compatibility.
1116    pub async fn shutdown(&self) -> Result<(), CdpError> {
1117        let handler_task = match self.inner.handler_task.lock() {
1118            Ok(mut guard) => guard.take(),
1119            Err(poisoned) => poisoned.into_inner().take(),
1120        };
1121
1122        if handler_task.is_none() {
1123            // Already shut down — preserve idempotence.
1124            return Ok(());
1125        }
1126
1127        if let Err(err) = self
1128            .inner
1129            .browser
1130            .execute(BrowserCloseParams::default())
1131            .await
1132        {
1133            tracing::debug!(error = %err, "failed to send Browser.close on shutdown");
1134        }
1135
1136        if let Some(task) = handler_task {
1137            task.abort();
1138        }
1139
1140        Ok(())
1141    }
1142}
1143
1144impl Drop for PersistentBrowserInner {
1145    fn drop(&mut self) {
1146        // Best-effort sync abort of the handler task. Sending CDP
1147        // commands here would require a runtime; `Browser::drop`
1148        // already reaps the child synchronously, so we only stop the
1149        // event loop.
1150        let task = match self.handler_task.lock() {
1151            Ok(mut guard) => guard.take(),
1152            Err(poisoned) => poisoned.into_inner().take(),
1153        };
1154        if let Some(task) = task {
1155            task.abort();
1156        }
1157    }
1158}
1159
1160impl BrowserDriver for PersistentBrowser {
1161    async fn snapshot(&self, target: Target) -> Result<PlumbSnapshot, CdpError> {
1162        Self::snapshot(self, target).await
1163    }
1164}
1165
1166fn persistent_browser_config(
1167    options: &ChromiumOptions,
1168    resolved_executable: Option<&Path>,
1169) -> Result<BrowserConfig, CdpError> {
1170    // PRD §16: pinning launch args removes a class of nondeterminism
1171    // (scrollbar overlay differences across DPRs, OS-level scaling).
1172    // `PersistentBrowser` does not fix a launch-time DPR — every
1173    // snapshot calls `Emulation.setDeviceMetricsOverride` to drive
1174    // both viewport and DPR per-call.
1175    let builder = BrowserConfig::builder()
1176        .chrome_detection(DetectionOptions {
1177            msedge: false,
1178            unstable: false,
1179        })
1180        .window_size(1280, 800)
1181        .arg("--hide-scrollbars");
1182
1183    // Same precedence rule as `ChromiumDriver::browser_config`.
1184    let builder = if let Some(path) = resolved_executable {
1185        ensure_executable_path(path)?;
1186        builder.chrome_executable(path)
1187    } else if let Some(path) = &options.executable_path {
1188        ensure_executable_path(path)?;
1189        builder.chrome_executable(path)
1190    } else if let Some(path) = chrome_path::detect() {
1191        builder.chrome_executable(path)
1192    } else {
1193        builder
1194    };
1195
1196    let builder = if let Some(profile) = &options.user_data_dir {
1197        builder.user_data_dir(profile)
1198    } else {
1199        builder
1200    };
1201
1202    builder.build().map_err(|_| chromium_not_found())
1203}
1204
1205/// When auto-fetch is enabled and the user didn't pin an
1206/// `executable_path`, resolve the cache directory and ensure a fetched
1207/// Chromium binary lives there. Returns the executable path the
1208/// `BrowserConfig` should pin; `None` means "fall through to whatever
1209/// the user supplied or to chromiumoxide's auto-detect."
1210///
1211/// Pure precedence rule: an explicit `executable_path` always wins
1212/// over auto-fetch. The two are not allowed to collide — if both are
1213/// set, the user's path is used and the fetcher is skipped.
1214async fn resolve_auto_fetch(options: &ChromiumOptions) -> Result<Option<PathBuf>, CdpError> {
1215    if !options.auto_fetch_chromium || options.executable_path.is_some() {
1216        return Ok(None);
1217    }
1218    let cache_dir = if let Some(dir) = options.auto_fetch_cache_dir.clone() {
1219        dir
1220    } else {
1221        fetcher::resolve_cache_dir()?
1222    };
1223    let path = fetcher::ensure_chromium(&cache_dir).await?;
1224    Ok(Some(path))
1225}
1226
1227async fn apply_viewport(page: &Page, target: &Target) -> Result<(), CdpError> {
1228    // `pin_dpr` (PRD §15 — `--dpr`) wins over `device_pixel_ratio` so
1229    // that callers can stress determinism by pinning a hidpi factor
1230    // independent of the viewport's logical DPR.
1231    let params = SetDeviceMetricsOverrideParams {
1232        width: i64::from(target.width),
1233        height: i64::from(target.height),
1234        device_scale_factor: target.effective_dpr(),
1235        mobile: false,
1236        scale: None,
1237        screen_width: None,
1238        screen_height: None,
1239        position_x: None,
1240        position_y: None,
1241        dont_set_visible_size: None,
1242        screen_orientation: None,
1243        viewport: None,
1244    };
1245    page.execute(params).await.map_err(driver_error)?;
1246    Ok(())
1247}
1248
1249/// All work that must happen on a fresh page before navigation.
1250///
1251/// Runs in this fixed order so behavior matches what users expect:
1252/// 1. Animation/scrollbar CSS killers — PRD §16 determinism.
1253/// 2. Auth script — runs before any page script, so the page-side
1254///    bootstrap can set window globals before the SPA boots.
1255/// 3. Cookies and HTTP headers — set on the network layer before the
1256///    very first request leaves Chromium.
1257/// 4. Storage-state cookies — same network layer; localStorage entries
1258///    in the storage-state are deferred to [`apply_storage_state_local_storage`]
1259///    after the origin loads, since localStorage is origin-scoped.
1260///
1261/// When [`ChromiumOptions::storage_state`] is set, the file is loaded
1262/// and parsed exactly once here. The returned [`StorageState`] is
1263/// threaded back into [`apply_storage_state_local_storage`] so the
1264/// driver never re-reads the file (closing a TOCTOU window where the
1265/// content could change between cookie installation and localStorage
1266/// replay).
1267async fn pre_navigate(
1268    page: &Page,
1269    target: &Target,
1270    options: &ChromiumOptions,
1271) -> Result<Option<StorageState>, CdpError> {
1272    if target.disable_animations {
1273        inject_animation_killer(page).await?;
1274    }
1275    if target.hide_scrollbars {
1276        inject_scrollbar_killer(page).await?;
1277    }
1278    if let Some(script_path) = options.auth_script.as_deref() {
1279        inject_auth_script(page, script_path).await?;
1280    }
1281    if !options.headers.is_empty() {
1282        install_extra_headers(page, &options.headers).await?;
1283    }
1284    if !options.cookies.is_empty() {
1285        install_cookies(page, &options.cookies, target.url.as_str()).await?;
1286    }
1287    let storage_state = if let Some(state_path) = options.storage_state.as_deref() {
1288        let state = StorageState::load_from_path(state_path)?;
1289        install_storage_state_cookies(page, &state).await?;
1290        Some(state)
1291    } else {
1292        None
1293    };
1294    Ok(storage_state)
1295}
1296
1297/// Wait stages that must run *after* navigation. PRD §15 — `--wait-for`
1298/// and `--wait-ms`.
1299///
1300/// Selector wait fires first (so users can synchronize on a
1301/// known-rendered element); the additional `--wait-ms` then runs as a
1302/// belt-and-suspenders sleep for SPAs whose post-render work doesn't
1303/// finish in the same tick.
1304async fn apply_post_navigate_waits(page: &Page, target: &Target) -> Result<(), CdpError> {
1305    if let Some(selector) = target.wait_for_selector.as_deref() {
1306        wait_for_selector(page, selector).await?;
1307    }
1308    if let Some(ms) = target.wait_ms {
1309        tokio::time::sleep(std::time::Duration::from_millis(ms)).await;
1310    }
1311    Ok(())
1312}
1313
1314/// Install localStorage entries from an already-parsed Playwright
1315/// storage-state.
1316///
1317/// Runs *after* navigation because `localStorage` is origin-scoped and
1318/// the only way to write to it from the driver is to evaluate a script
1319/// in the page context. Entries whose `origin` does not match the
1320/// navigated URL's origin are skipped (same isolation Playwright applies).
1321///
1322/// The caller provides the parsed [`StorageState`] (loaded once in
1323/// [`pre_navigate`]) so the file is never read twice — closing the
1324/// TOCTOU window between cookie installation and localStorage replay.
1325async fn apply_storage_state_local_storage(
1326    page: &Page,
1327    target: &Target,
1328    state: Option<&StorageState>,
1329) -> Result<(), CdpError> {
1330    let Some(state) = state else {
1331        return Ok(());
1332    };
1333    let target_origin = origin_of(target.url.as_str()).unwrap_or_default();
1334    for origin_entry in &state.origins {
1335        if origin_entry.origin != target_origin {
1336            continue;
1337        }
1338        for entry in &origin_entry.local_storage {
1339            // Build a JSON.stringify-style argument so the values are
1340            // safe regardless of contained quotes.
1341            let key = serde_json::to_string(&entry.name).map_err(|err| {
1342                CdpError::MalformedStorageState {
1343                    path: PathBuf::new(),
1344                    reason: format!("could not serialize key: {err}"),
1345                }
1346            })?;
1347            let value = serde_json::to_string(&entry.value).map_err(|err| {
1348                CdpError::MalformedStorageState {
1349                    path: PathBuf::new(),
1350                    reason: format!("could not serialize value: {err}"),
1351                }
1352            })?;
1353            let script = format!("window.localStorage.setItem({key}, {value});");
1354            page.evaluate(script.as_str()).await.map_err(driver_error)?;
1355        }
1356    }
1357    Ok(())
1358}
1359
1360fn origin_of(input: &str) -> Option<String> {
1361    // WHATWG-compliant origin: `Url::origin().ascii_serialization()`
1362    // handles default-port elision (`:443` for `https`, `:80` for
1363    // `http`), scheme case-folding, IDNA host normalization, and
1364    // strips userinfo / path / query / fragment. Matches Playwright's
1365    // stored `origin` shape so storage-state origin compares are not
1366    // tripped up by `https://example.com:443/foo` vs
1367    // `https://example.com`.
1368    let parsed = url::Url::parse(input).ok()?;
1369    let origin = parsed.origin();
1370    if origin.is_tuple() {
1371        Some(origin.ascii_serialization())
1372    } else {
1373        // Opaque origins (e.g. `data:`, `file:`) cannot match a
1374        // Playwright-recorded site origin — bail out.
1375        None
1376    }
1377}
1378
1379async fn inject_animation_killer(page: &Page) -> Result<(), CdpError> {
1380    // PRD §16 determinism mitigation: install a CSS-injection script that
1381    // runs before any page script, so transitions/animations don't race
1382    // with `captureSnapshot` and produce different bounds across runs.
1383    let source = "(() => { \
1384        const style = document.createElement('style'); \
1385        style.textContent = '*, *::before, *::after { \
1386            animation-duration: 0s !important; \
1387            animation-delay: 0s !important; \
1388            transition-duration: 0s !important; \
1389            transition-delay: 0s !important; \
1390            caret-color: transparent !important; \
1391        }'; \
1392        (document.head || document.documentElement).appendChild(style); \
1393    })();";
1394    add_script_to_evaluate_on_new_document(page, source).await
1395}
1396
1397async fn inject_scrollbar_killer(page: &Page) -> Result<(), CdpError> {
1398    // PRD §16 determinism mitigation: scrollbar overlay differs across
1399    // platforms / DPRs. The `--hide-scrollbars` Chromium launch arg is a
1400    // first line of defense; this CSS injection covers the cases where
1401    // the launch arg alone is not honored (Linux non-overlay scrollbars,
1402    // CSS-painted scrollbars in some apps).
1403    let source = "(() => { \
1404        const style = document.createElement('style'); \
1405        style.textContent = 'html { overflow: hidden !important; } \
1406            ::-webkit-scrollbar { display: none !important; }'; \
1407        (document.head || document.documentElement).appendChild(style); \
1408    })();";
1409    add_script_to_evaluate_on_new_document(page, source).await
1410}
1411
1412/// Read `path` (validated as a `.js` file under the CWD) and register
1413/// it as `Page.addScriptToEvaluateOnNewDocument` so it runs before any
1414/// page script.
1415///
1416/// # Security boundary
1417///
1418/// The safe-path check via `canonicalize_safe_path` is best-effort
1419/// only — see that function's docs. Treat the resulting file content
1420/// as user-trusted: the CLI hands us a path supplied either by the
1421/// invoking user or by an `auth-script` already in the project, never
1422/// by a remote source. The TOCTOU window between canonicalization and
1423/// `std::fs::read_to_string` is acknowledged but not yet closed; the
1424/// full fix requires `cap_std`.
1425async fn inject_auth_script(page: &Page, path: &Path) -> Result<(), CdpError> {
1426    let canonical = canonicalize_safe_path(path)?;
1427    if canonical.extension().and_then(|s| s.to_str()) != Some("js") {
1428        return Err(CdpError::InvalidPath {
1429            path: path.to_path_buf(),
1430            reason: "auth script must have a `.js` extension".to_owned(),
1431        });
1432    }
1433    let source = std::fs::read_to_string(&canonical).map_err(|err| CdpError::InvalidPath {
1434        path: canonical.clone(),
1435        reason: format!("could not read: {err}"),
1436    })?;
1437    add_script_to_evaluate_on_new_document(page, &source).await
1438}
1439
1440async fn add_script_to_evaluate_on_new_document(page: &Page, source: &str) -> Result<(), CdpError> {
1441    let params = AddScriptToEvaluateOnNewDocumentParams {
1442        source: source.to_owned(),
1443        world_name: None,
1444        include_command_line_api: None,
1445        run_immediately: Some(true),
1446    };
1447    page.execute(params).await.map_err(driver_error)?;
1448    Ok(())
1449}
1450
1451async fn install_extra_headers(page: &Page, headers: &[(String, String)]) -> Result<(), CdpError> {
1452    // Sort by name for deterministic CDP traffic. Plumb's invariant is
1453    // byte-identical *output*, but stable network-layer requests make
1454    // diffing tcpdumps across runs viable too.
1455    let mut entries: Vec<(String, String)> = headers.to_vec();
1456    entries.sort_by(|a, b| a.0.cmp(&b.0));
1457    let mut object = serde_json::Map::with_capacity(entries.len());
1458    for (name, value) in entries {
1459        // Library-boundary re-validation: `headers: Vec<(String, String)>`
1460        // is `pub` on `ChromiumOptions`, so a downstream consumer can
1461        // construct entries without going through `parse_header_kv`.
1462        // Apply the same checks here to keep header-injection guards
1463        // intact regardless of how the entries were built.
1464        validate_header_name(&name)?;
1465        validate_no_ctl(&value, "value", "header")?;
1466        object.insert(name, serde_json::Value::String(value));
1467    }
1468    let params = SetExtraHttpHeadersParams::new(Headers::new(serde_json::Value::Object(object)));
1469    page.execute(params).await.map_err(driver_error)?;
1470    Ok(())
1471}
1472
1473async fn install_cookies(
1474    page: &Page,
1475    cookies: &[Cookie],
1476    default_url: &str,
1477) -> Result<(), CdpError> {
1478    // Sort by `(name, value)` so the network-layer call is stable across
1479    // runs even when the caller supplied cookies in a different order.
1480    let mut sorted: Vec<Cookie> = cookies.to_vec();
1481    sorted.sort_by(|a, b| {
1482        (a.name.as_str(), a.value.as_str()).cmp(&(b.name.as_str(), b.value.as_str()))
1483    });
1484    // Library-boundary re-validation: `Cookie` fields are all `pub`, so
1485    // a downstream consumer can build a `Cookie` without going through
1486    // `Cookie::parse_kv`. Apply the same name/value checks here so the
1487    // injection guards are not bypassable. `domain` and `path`, when
1488    // present, also pass through the control-byte check.
1489    for cookie in &sorted {
1490        validate_cookie_name(&cookie.name)?;
1491        validate_cookie_value(&cookie.value)?;
1492        if let Some(domain) = cookie.domain.as_deref() {
1493            validate_no_ctl(domain, "domain", "cookie")?;
1494        }
1495        if let Some(path) = cookie.path.as_deref() {
1496            validate_no_ctl(path, "path", "cookie")?;
1497        }
1498    }
1499    let url_for_cookies = if default_url.starts_with("http") {
1500        Some(default_url)
1501    } else {
1502        None
1503    };
1504    let params = SetCookiesParams::new(
1505        sorted
1506            .into_iter()
1507            .map(|c| c.into_cdp_param(url_for_cookies))
1508            .collect(),
1509    );
1510    page.execute(params).await.map_err(driver_error)?;
1511    Ok(())
1512}
1513
1514async fn install_storage_state_cookies(page: &Page, state: &StorageState) -> Result<(), CdpError> {
1515    if state.cookies.is_empty() {
1516        return Ok(());
1517    }
1518    let mut params: Vec<CookieParam> = Vec::with_capacity(state.cookies.len());
1519    for cookie in &state.cookies {
1520        let mut p = CookieParam::new(cookie.name.clone(), cookie.value.clone());
1521        p.domain = Some(cookie.domain.clone());
1522        p.path = Some(cookie.path.clone());
1523        p.secure = Some(cookie.secure);
1524        p.http_only = Some(cookie.http_only);
1525        params.push(p);
1526    }
1527    page.execute(SetCookiesParams::new(params))
1528        .await
1529        .map_err(driver_error)?;
1530    Ok(())
1531}
1532
1533async fn wait_for_selector(page: &Page, selector: &str) -> Result<(), CdpError> {
1534    // Poll `find_element` with a 50ms backoff up to 10 seconds total
1535    // (PRD §15 default). The selector is the users contract for "the
1536    // page is rendered enough for me" — burning the full 10 seconds is
1537    // intentional when the selector never matches; we surface that as a
1538    // driver error so CI fails loudly rather than capturing a half-baked
1539    // snapshot.
1540    //
1541    // Wall-clock-free implementation: an outer `tokio::time::timeout`
1542    // bounds the whole loop. Tokios timer infrastructure does its own
1543    // monotonic time tracking internally and is allowed in `plumb-cdp`
1544    // because it doesnt leak into the snapshot (PRD §9 isolates the
1545    // "no wall-clock" rule to the rule engine and observable output).
1546    let attempt = async {
1547        loop {
1548            match page.find_element(selector.to_owned()).await {
1549                Ok(_) => return Ok::<(), CdpError>(()),
1550                Err(_) => {
1551                    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
1552                }
1553            }
1554        }
1555    };
1556    match tokio::time::timeout(std::time::Duration::from_secs(10), attempt).await {
1557        Ok(result) => result,
1558        Err(_) => Err(CdpError::Driver(Box::new(io::Error::other(format!(
1559            "wait_for_selector `{selector}` exhausted 10s budget"
1560        ))))),
1561    }
1562}
1563
1564/// Deterministic fake driver. Recognizes `plumb-fake://hello` and returns
1565/// [`PlumbSnapshot::canned`]. Used by the walking-skeleton CLI and by
1566/// downstream tests.
1567///
1568/// Viewport-aware end-to-end: the returned snapshot's viewport name,
1569/// width, and height match the target, and any per-node rect that
1570/// covered the canned viewport is rescaled to the target dimensions
1571/// so that hand-testing multi-viewport behavior produces the expected
1572/// rects rather than the canned 1280x800 ones.
1573#[derive(Debug, Default, Clone, Copy)]
1574pub struct FakeDriver;
1575
1576impl BrowserDriver for FakeDriver {
1577    #[allow(clippy::unused_async)]
1578    async fn snapshot(&self, target: Target) -> Result<PlumbSnapshot, CdpError> {
1579        if target.url == "plumb-fake://hello" {
1580            let mut snap = PlumbSnapshot::canned();
1581            // Capture the canned viewport bounds before overwriting so
1582            // we can rewrite any node rect that covered the full
1583            // canned viewport to the target's dimensions.
1584            let canned_w = snap.viewport_width;
1585            let canned_h = snap.viewport_height;
1586            snap.viewport = target.viewport.clone();
1587            snap.viewport_width = target.width;
1588            snap.viewport_height = target.height;
1589            for node in &mut snap.nodes {
1590                if let Some(rect) = node.rect.as_mut()
1591                    && rect.x == 0
1592                    && rect.y == 0
1593                    && rect.width == canned_w
1594                    && rect.height == canned_h
1595                {
1596                    rect.width = target.width;
1597                    rect.height = target.height;
1598                }
1599            }
1600            Ok(snap)
1601        } else {
1602            Err(CdpError::UnknownFakeUrl(target.url))
1603        }
1604    }
1605}
1606
1607/// Whether a URL belongs to the fake-driver scheme.
1608#[must_use]
1609pub fn is_fake_url(url: &str) -> bool {
1610    url.starts_with("plumb-fake://")
1611}
1612
1613fn ensure_executable_path(path: &Path) -> Result<(), CdpError> {
1614    if path.is_file() {
1615        Ok(())
1616    } else {
1617        Err(chromium_not_found())
1618    }
1619}
1620
1621fn chromium_not_found() -> CdpError {
1622    CdpError::ChromiumNotFound {
1623        install_hint: chromium_install_hint(),
1624    }
1625}
1626
1627fn chromium_install_hint() -> String {
1628    let platform_hint = if cfg!(target_os = "macos") {
1629        "macOS: install Google Chrome or run `brew install --cask chromium`."
1630    } else if cfg!(target_os = "windows") {
1631        "Windows: install Google Chrome or Chromium and pass the `.exe` path if it is not auto-detected."
1632    } else {
1633        "Linux: install `google-chrome-stable`, `chromium`, or `chromium-browser` with your package manager."
1634    };
1635
1636    // The `--executable-path` mention here is for the not-found case:
1637    // pointing at a binary auto-detect missed. It does NOT bypass the
1638    // version check — the supplied binary still has to fall in the
1639    // supported range.
1640    format!(
1641        "Install Chrome/Chromium between major {MIN_SUPPORTED_CHROMIUM_MAJOR} and {MAX_SUPPORTED_CHROMIUM_MAJOR} (inclusive), or pass `--executable-path <path>` to a Chromium binary in that range that auto-detect missed. {platform_hint}"
1642    )
1643}
1644
1645struct ChromiumSession {
1646    browser: Browser,
1647    handler_task: JoinHandle<()>,
1648}
1649
1650impl ChromiumSession {
1651    async fn launch(config: BrowserConfig) -> Result<Self, CdpError> {
1652        let (browser, handler) = Browser::launch(config).await.map_err(map_launch_error)?;
1653        let handler_task = poll_handler(handler);
1654        Ok(Self {
1655            browser,
1656            handler_task,
1657        })
1658    }
1659
1660    async fn shutdown(&mut self) -> Result<(), CdpError> {
1661        let close_result = self.browser.close().await.map_err(driver_error);
1662        if let Err(close_err) = close_result {
1663            if let Err(kill_err) = kill_browser(&mut self.browser).await {
1664                tracing::debug!(error = %kill_err, "failed to kill Chromium after close error");
1665            }
1666            self.abort_handler().await;
1667            return Err(close_err);
1668        }
1669
1670        if let Err(wait_err) = self.browser.wait().await {
1671            let cleanup_err = io_error(wait_err);
1672            if let Err(kill_err) = kill_browser(&mut self.browser).await {
1673                tracing::debug!(error = %kill_err, "failed to kill Chromium after wait error");
1674            }
1675            self.abort_handler().await;
1676            return Err(cleanup_err);
1677        }
1678
1679        self.abort_handler().await;
1680        Ok(())
1681    }
1682
1683    async fn abort_handler(&mut self) {
1684        self.handler_task.abort();
1685        if let Err(join_err) = (&mut self.handler_task).await
1686            && !join_err.is_cancelled()
1687        {
1688            tracing::debug!(error = %join_err, "Chromium handler task failed");
1689        }
1690    }
1691}
1692
1693fn poll_handler(mut handler: Handler) -> JoinHandle<()> {
1694    tokio::spawn(async move {
1695        while let Some(result) = handler.next().await {
1696            if let Err(err) = result {
1697                tracing::debug!(error = %err, "Chromium handler error");
1698            }
1699        }
1700    })
1701}
1702
1703async fn kill_browser(browser: &mut Browser) -> Result<(), CdpError> {
1704    if let Some(result) = browser.kill().await {
1705        result.map_err(io_error)?;
1706    }
1707    Ok(())
1708}
1709
1710async fn validate_browser_version(browser: &Browser) -> Result<(), CdpError> {
1711    let version = browser.version().await.map_err(driver_error)?;
1712    validate_chromium_product_major(&version.product)
1713}
1714
1715fn validate_chromium_product_major(product: &str) -> Result<(), CdpError> {
1716    let found = chromium_major_from_product(product).ok_or_else(|| {
1717        CdpError::Driver(Box::new(io::Error::new(
1718            io::ErrorKind::InvalidData,
1719            format!("could not parse Chromium product version `{product}`"),
1720        )))
1721    })?;
1722
1723    // PRD §16: Plumb accepts a contiguous range of Chromium majors,
1724    // re-validated whenever the upper bound moves.
1725    if (MIN_SUPPORTED_CHROMIUM_MAJOR..=MAX_SUPPORTED_CHROMIUM_MAJOR).contains(&found) {
1726        Ok(())
1727    } else {
1728        Err(CdpError::UnsupportedChromium {
1729            min_supported: MIN_SUPPORTED_CHROMIUM_MAJOR,
1730            max_supported: MAX_SUPPORTED_CHROMIUM_MAJOR,
1731            found,
1732        })
1733    }
1734}
1735
1736fn chromium_major_from_product(product: &str) -> Option<u32> {
1737    let (_, version) = product.split_once('/')?;
1738    let major = version.split('.').next()?;
1739    major.parse().ok()
1740}
1741
1742fn map_launch_error(err: chromiumoxide::error::CdpError) -> CdpError {
1743    match err {
1744        chromiumoxide::error::CdpError::Io(io_err) => {
1745            if io_err.kind() == io::ErrorKind::NotFound {
1746                chromium_not_found()
1747            } else {
1748                io_error(io_err)
1749            }
1750        }
1751        chromiumoxide::error::CdpError::LaunchIo(io_err, stderr) => {
1752            if io_err.kind() == io::ErrorKind::NotFound {
1753                chromium_not_found()
1754            } else {
1755                CdpError::Driver(Box::new(chromiumoxide::error::CdpError::LaunchIo(
1756                    io_err, stderr,
1757                )))
1758            }
1759        }
1760        other => driver_error(other),
1761    }
1762}
1763
1764fn driver_error(err: chromiumoxide::error::CdpError) -> CdpError {
1765    CdpError::Driver(Box::new(err))
1766}
1767
1768fn io_error(err: io::Error) -> CdpError {
1769    CdpError::Driver(Box::new(err))
1770}
1771
1772fn malformed(reason: impl Into<String>) -> CdpError {
1773    CdpError::MalformedSnapshot {
1774        reason: reason.into(),
1775    }
1776}
1777
1778/// DOM `nodeType` for an element node — the only kind Plumb keeps in the
1779/// flattened snapshot. Text/comment/doctype nodes are skipped.
1780const ELEMENT_NODE_TYPE: i64 = 1;
1781
1782/// Flatten the CDP `DOMSnapshot.captureSnapshot` response into a
1783/// deterministic [`PlumbSnapshot`].
1784///
1785/// The flattening is a pure function of `(target, response)`. It walks
1786/// `documents[0]` in source order, keeps element nodes, and resolves
1787/// every string index through the shared `strings` table. Children
1788/// lists are sorted by `dom_order` and the final node vector is sorted
1789/// by `dom_order` before return — these two sorts keep the snapshot
1790/// byte-identical across runs against the same page.
1791fn flatten_snapshot(
1792    target: &Target,
1793    response: &CaptureSnapshotReturns,
1794) -> Result<PlumbSnapshot, CdpError> {
1795    let strings = response.strings.as_slice();
1796    let document = response
1797        .documents
1798        .first()
1799        .ok_or_else(|| malformed("documents array is empty"))?;
1800
1801    let nodes_view = NodesView::from_document(document)?;
1802    let layout_view = LayoutView::from_document(document)?;
1803    let node_to_dom_order = build_dom_order_map(&nodes_view);
1804
1805    let FlattenedNodes {
1806        mut nodes,
1807        tags,
1808        parents,
1809    } = build_nodes(&nodes_view, &node_to_dom_order, strings)?;
1810
1811    apply_layout(&mut nodes, &layout_view, &node_to_dom_order, strings)?;
1812    finalize_nodes(&mut nodes, &tags, &parents);
1813    nodes.sort_by_key(|n| n.dom_order);
1814
1815    let text_boxes = extract_text_boxes(document, &layout_view, &nodes_view, &node_to_dom_order);
1816
1817    Ok(PlumbSnapshot {
1818        url: target.url.clone(),
1819        viewport: target.viewport.clone(),
1820        viewport_width: target.width,
1821        viewport_height: target.height,
1822        nodes,
1823        text_boxes,
1824    })
1825}
1826
1827/// Result of the first flatten pass — element nodes with bookkeeping
1828/// indexes for the layout/selector passes.
1829struct FlattenedNodes {
1830    nodes: Vec<SnapshotNode>,
1831    tags: IndexMap<u64, String>,
1832    parents: IndexMap<u64, Option<u64>>,
1833}
1834
1835/// Map every CDP node index → kept element's `dom_order`. Non-element
1836/// nodes get `None`. Element nodes get a 0-based, gap-free order.
1837fn build_dom_order_map(nodes_view: &NodesView<'_>) -> Vec<Option<u64>> {
1838    let mut map: Vec<Option<u64>> = vec![None; nodes_view.len()];
1839    let mut next_order: u64 = 0;
1840    for (idx, slot) in map.iter_mut().enumerate() {
1841        if nodes_view.is_element(idx) {
1842            *slot = Some(next_order);
1843            next_order += 1;
1844        }
1845    }
1846    map
1847}
1848
1849/// Walk up the CDP parent chain from `node_index` until reaching a node
1850/// that maps to an element `dom_order`. Returns that `dom_order`, or
1851/// `None` when no ancestor is a kept element.
1852///
1853/// CDP attributes inline text layout boxes to `#text` nodes (nodeType 3),
1854/// which are not elements and so carry no `dom_order`. Re-attributing a
1855/// box to the nearest ancestor element keeps `text_boxes_for` non-empty
1856/// for the painting element (`<p>`, `<span>`, …). When `node_index`
1857/// already maps to an element, its own `dom_order` is returned
1858/// immediately — preserving the prior behavior for element-owned boxes.
1859fn nearest_element_dom_order(
1860    nodes_view: &NodesView<'_>,
1861    node_to_dom_order: &[Option<u64>],
1862    node_index: usize,
1863) -> Option<u64> {
1864    let mut cursor = Some(node_index);
1865    // Bound the walk by the node count: a tree of N nodes has no path
1866    // longer than N, so this terminates even on a malformed cyclic
1867    // `parentIndex` chain.
1868    for _ in 0..=node_to_dom_order.len() {
1869        let idx = cursor?;
1870        if let Some(order) = node_to_dom_order.get(idx).copied().flatten() {
1871            return Some(order);
1872        }
1873        cursor = nodes_view
1874            .parent_index(idx)
1875            .and_then(|parent| usize::try_from(parent).ok());
1876    }
1877    None
1878}
1879
1880fn build_nodes(
1881    nodes_view: &NodesView<'_>,
1882    node_to_dom_order: &[Option<u64>],
1883    strings: &[String],
1884) -> Result<FlattenedNodes, CdpError> {
1885    let mut nodes: Vec<SnapshotNode> = Vec::new();
1886    let mut tags: IndexMap<u64, String> = IndexMap::new();
1887    let mut parents: IndexMap<u64, Option<u64>> = IndexMap::new();
1888
1889    for (idx, dom_order) in node_to_dom_order.iter().enumerate() {
1890        let Some(dom_order) = dom_order else { continue };
1891        let tag = lookup_string(strings, nodes_view.node_name(idx)?)?.to_lowercase();
1892        let attrs = nodes_view.attributes_for(idx, strings)?;
1893        let parent_dom_order =
1894            resolve_parent_dom_order(nodes_view.parent_index(idx), idx, node_to_dom_order)?;
1895
1896        tags.insert(*dom_order, tag.clone());
1897        parents.insert(*dom_order, parent_dom_order);
1898
1899        nodes.push(SnapshotNode {
1900            dom_order: *dom_order,
1901            selector: String::new(),
1902            tag,
1903            attrs,
1904            computed_styles: IndexMap::new(),
1905            rect: None,
1906            parent: parent_dom_order,
1907            children: Vec::new(),
1908        });
1909    }
1910
1911    Ok(FlattenedNodes {
1912        nodes,
1913        tags,
1914        parents,
1915    })
1916}
1917
1918fn resolve_parent_dom_order(
1919    parent_index: Option<i64>,
1920    idx: usize,
1921    node_to_dom_order: &[Option<u64>],
1922) -> Result<Option<u64>, CdpError> {
1923    let Some(parent_idx) = parent_index else {
1924        return Ok(None);
1925    };
1926    let parent_idx_usize = usize::try_from(parent_idx).map_err(|_| {
1927        malformed(format!(
1928            "negative parent index `{parent_idx}` for node {idx}"
1929        ))
1930    })?;
1931    if parent_idx_usize >= node_to_dom_order.len() {
1932        return Err(malformed(format!(
1933            "parent index `{parent_idx}` out of range for node {idx}"
1934        )));
1935    }
1936    Ok(node_to_dom_order[parent_idx_usize])
1937}
1938
1939fn apply_layout(
1940    nodes: &mut [SnapshotNode],
1941    layout_view: &LayoutView<'_>,
1942    node_to_dom_order: &[Option<u64>],
1943    strings: &[String],
1944) -> Result<(), CdpError> {
1945    for layout_idx in 0..layout_view.len() {
1946        let cdp_node_idx = layout_view.node_index(layout_idx)?;
1947        let cdp_node_idx_usize = usize::try_from(cdp_node_idx).map_err(|_| {
1948            malformed(format!(
1949                "negative layout node index `{cdp_node_idx}` at layout slot {layout_idx}"
1950            ))
1951        })?;
1952        if cdp_node_idx_usize >= node_to_dom_order.len() {
1953            return Err(malformed(format!(
1954                "layout node index `{cdp_node_idx}` out of range at layout slot {layout_idx}"
1955            )));
1956        }
1957        let Some(dom_order) = node_to_dom_order[cdp_node_idx_usize] else {
1958            // Layout entry refers to a non-element node — skip.
1959            continue;
1960        };
1961        let Ok(dom_order_usize) = usize::try_from(dom_order) else {
1962            continue;
1963        };
1964        if dom_order_usize >= nodes.len() {
1965            continue;
1966        }
1967        if let Some(rect) = layout_view.rect_at(layout_idx)? {
1968            nodes[dom_order_usize].rect = Some(rect);
1969        }
1970        if let Some(styles) = layout_view.styles_at(layout_idx, strings)? {
1971            nodes[dom_order_usize].computed_styles = styles;
1972        }
1973    }
1974    Ok(())
1975}
1976
1977fn finalize_nodes(
1978    nodes: &mut [SnapshotNode],
1979    tags: &IndexMap<u64, String>,
1980    parents: &IndexMap<u64, Option<u64>>,
1981) {
1982    let mut children_index: IndexMap<u64, Vec<u64>> = IndexMap::new();
1983    for node in nodes.iter() {
1984        if let Some(parent) = node.parent {
1985            children_index
1986                .entry(parent)
1987                .or_default()
1988                .push(node.dom_order);
1989        }
1990    }
1991    for kids in children_index.values_mut() {
1992        kids.sort_unstable();
1993    }
1994    for node in nodes {
1995        if let Some(kids) = children_index.swap_remove(&node.dom_order) {
1996            node.children = kids;
1997        }
1998        node.selector = build_selector(node.dom_order, tags, parents);
1999    }
2000}
2001
2002/// Extract text boxes from `document.text_boxes`, mapping layout indices
2003/// back to `dom_order` via the layout view and node-to-dom-order map.
2004///
2005/// CDP attributes each inline text box to the `#text` layout node that
2006/// owns it. `#text` nodes (nodeType 3) are not elements, so they carry no
2007/// `dom_order` of their own. Rather than dropping every real text run,
2008/// each box is re-attributed to the `dom_order` of its nearest ancestor
2009/// element via [`nearest_element_dom_order`]. A box is only skipped when
2010/// its layout index is out of range or no ancestor element has a
2011/// `dom_order`. Returns sorted by `(dom_order, start)`.
2012fn extract_text_boxes(
2013    document: &DocumentSnapshot,
2014    layout_view: &LayoutView<'_>,
2015    nodes_view: &NodesView<'_>,
2016    node_to_dom_order: &[Option<u64>],
2017) -> Vec<TextBox> {
2018    let tb = &document.text_boxes;
2019    let count = tb.layout_index.len();
2020
2021    // Parallel arrays must agree on length; if not, return empty rather
2022    // than panic — the snapshot is still usable without text boxes.
2023    if tb.bounds.len() != count || tb.start.len() != count || tb.length.len() != count {
2024        return Vec::new();
2025    }
2026
2027    let mut result: Vec<TextBox> = Vec::with_capacity(count);
2028    for i in 0..count {
2029        let layout_idx = tb.layout_index[i];
2030        let Ok(layout_idx_usize) = usize::try_from(layout_idx) else {
2031            continue;
2032        };
2033        if layout_idx_usize >= layout_view.len() {
2034            continue;
2035        }
2036        // layout_view.node_index maps layout slot → CDP node index.
2037        let Ok(cdp_node_idx) = layout_view.node_index(layout_idx_usize) else {
2038            continue;
2039        };
2040        let Ok(cdp_node_idx_usize) = usize::try_from(cdp_node_idx) else {
2041            continue;
2042        };
2043        if cdp_node_idx_usize >= node_to_dom_order.len() {
2044            continue;
2045        }
2046        // The layout node owning this box is usually a `#text` node with
2047        // no `dom_order`. Re-attribute to the nearest ancestor element so
2048        // the painting element (`<p>`, `<span>`, …) carries its text run;
2049        // only drop the box when no ancestor element has a `dom_order`.
2050        let Some(dom_order) =
2051            nearest_element_dom_order(nodes_view, node_to_dom_order, cdp_node_idx_usize)
2052        else {
2053            continue;
2054        };
2055
2056        let bounds_inner = tb.bounds[i].inner();
2057        if bounds_inner.len() != 4 {
2058            continue;
2059        }
2060        let bounds = rect_from_bounds(bounds_inner);
2061
2062        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
2063        let start = tb.start[i].max(0) as u32;
2064        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
2065        let length = tb.length[i].max(0) as u32;
2066
2067        result.push(TextBox {
2068            dom_order,
2069            bounds,
2070            start,
2071            length,
2072        });
2073    }
2074
2075    // Sort by (dom_order, start) for determinism.
2076    result.sort_by_key(|tb| (tb.dom_order, tb.start));
2077    result
2078}
2079
2080fn lookup_string(strings: &[String], idx: i64) -> Result<&str, CdpError> {
2081    let idx_usize =
2082        usize::try_from(idx).map_err(|_| malformed(format!("negative string index `{idx}`")))?;
2083    strings
2084        .get(idx_usize)
2085        .map(String::as_str)
2086        .ok_or_else(|| malformed(format!("string index `{idx}` out of range")))
2087}
2088
2089/// Like [`lookup_string`] but treats negative indices as `None` instead of
2090/// an error.
2091///
2092/// Chrome uses `-1` as a sentinel in optional DOMSnapshot string slots
2093/// (e.g. attribute values, computed-style values) to signal "no value."
2094/// Required slots (node names, attribute names) must still go through
2095/// [`lookup_string`] so that a negative index there remains a hard error.
2096fn lookup_optional_string(strings: &[String], idx: i64) -> Result<Option<&str>, CdpError> {
2097    if idx < 0 {
2098        return Ok(None);
2099    }
2100    lookup_string(strings, idx).map(Some)
2101}
2102
2103fn build_selector(
2104    dom_order: u64,
2105    tags: &IndexMap<u64, String>,
2106    parents: &IndexMap<u64, Option<u64>>,
2107) -> String {
2108    let mut chain: Vec<&str> = Vec::new();
2109    let mut cursor = Some(dom_order);
2110    while let Some(current) = cursor {
2111        if let Some(tag) = tags.get(&current) {
2112            chain.push(tag.as_str());
2113        }
2114        cursor = parents.get(&current).copied().flatten();
2115    }
2116    chain.reverse();
2117    chain.join(" > ")
2118}
2119
2120/// Borrowed view over a `NodeTreeSnapshot` that resolves the parallel
2121/// arrays (`parent_index`, `node_type`, `node_name`, `attributes`)
2122/// without copying.
2123struct NodesView<'a> {
2124    node_count: usize,
2125    parent_index: &'a [i64],
2126    node_type: &'a [i64],
2127    node_name: &'a [chromiumoxide::cdp::browser_protocol::dom_snapshot::StringIndex],
2128    attributes: Option<&'a [chromiumoxide::cdp::browser_protocol::dom_snapshot::ArrayOfStrings]>,
2129}
2130
2131impl<'a> NodesView<'a> {
2132    fn from_document(document: &'a DocumentSnapshot) -> Result<Self, CdpError> {
2133        let node_name = document
2134            .nodes
2135            .node_name
2136            .as_deref()
2137            .ok_or_else(|| malformed("nodes.nodeName missing"))?;
2138        let parent_index = document
2139            .nodes
2140            .parent_index
2141            .as_deref()
2142            .ok_or_else(|| malformed("nodes.parentIndex missing"))?;
2143        let node_type = document
2144            .nodes
2145            .node_type
2146            .as_deref()
2147            .ok_or_else(|| malformed("nodes.nodeType missing"))?;
2148
2149        let node_count = node_name.len();
2150        if parent_index.len() != node_count || node_type.len() != node_count {
2151            return Err(malformed(format!(
2152                "parallel node arrays disagree on length: \
2153                 nodeName={}, parentIndex={}, nodeType={}",
2154                node_name.len(),
2155                parent_index.len(),
2156                node_type.len()
2157            )));
2158        }
2159
2160        let attributes = document.nodes.attributes.as_deref();
2161        if let Some(attrs) = attributes
2162            && attrs.len() != node_count
2163        {
2164            return Err(malformed(format!(
2165                "nodes.attributes length {} disagrees with nodeName length {node_count}",
2166                attrs.len()
2167            )));
2168        }
2169
2170        Ok(Self {
2171            node_count,
2172            parent_index,
2173            node_type,
2174            node_name,
2175            attributes,
2176        })
2177    }
2178
2179    fn len(&self) -> usize {
2180        self.node_count
2181    }
2182
2183    fn is_element(&self, idx: usize) -> bool {
2184        self.node_type
2185            .get(idx)
2186            .copied()
2187            .is_some_and(|t| t == ELEMENT_NODE_TYPE)
2188    }
2189
2190    fn node_name(&self, idx: usize) -> Result<i64, CdpError> {
2191        self.node_name
2192            .get(idx)
2193            .map(|s| *s.inner())
2194            .ok_or_else(|| malformed(format!("nodeName missing for node {idx}")))
2195    }
2196
2197    fn parent_index(&self, idx: usize) -> Option<i64> {
2198        match self.parent_index.get(idx).copied() {
2199            Some(p) if p >= 0 => Some(p),
2200            _ => None,
2201        }
2202    }
2203
2204    fn attributes_for(
2205        &self,
2206        idx: usize,
2207        strings: &[String],
2208    ) -> Result<IndexMap<String, String>, CdpError> {
2209        let Some(attrs) = self.attributes else {
2210            return Ok(IndexMap::new());
2211        };
2212        let Some(entry) = attrs.get(idx) else {
2213            return Ok(IndexMap::new());
2214        };
2215        let pairs = entry.inner();
2216        if pairs.len() % 2 != 0 {
2217            return Err(malformed(format!(
2218                "attributes for node {idx} has odd length {}",
2219                pairs.len()
2220            )));
2221        }
2222        let mut out = IndexMap::with_capacity(pairs.len() / 2);
2223        for chunk in pairs.chunks_exact(2) {
2224            let name = lookup_string(strings, *chunk[0].inner())?.to_string();
2225            let value = lookup_optional_string(strings, *chunk[1].inner())?
2226                .unwrap_or("")
2227                .to_string();
2228            out.insert(name, value);
2229        }
2230        Ok(out)
2231    }
2232}
2233
2234/// Borrowed view over a `LayoutTreeSnapshot` with bounds checks against
2235/// the parallel `node_index`/`bounds`/`styles` arrays.
2236struct LayoutView<'a> {
2237    node_index: &'a [i64],
2238    bounds: &'a [chromiumoxide::cdp::browser_protocol::dom_snapshot::Rectangle],
2239    styles: &'a [chromiumoxide::cdp::browser_protocol::dom_snapshot::ArrayOfStrings],
2240}
2241
2242impl<'a> LayoutView<'a> {
2243    fn from_document(document: &'a DocumentSnapshot) -> Result<Self, CdpError> {
2244        let node_index = document.layout.node_index.as_slice();
2245        let bounds = document.layout.bounds.as_slice();
2246        let styles = document.layout.styles.as_slice();
2247        if node_index.len() != bounds.len() {
2248            return Err(malformed(format!(
2249                "layout.nodeIndex length {} disagrees with layout.bounds length {}",
2250                node_index.len(),
2251                bounds.len()
2252            )));
2253        }
2254        if !styles.is_empty() && styles.len() != node_index.len() {
2255            return Err(malformed(format!(
2256                "layout.styles length {} disagrees with layout.nodeIndex length {}",
2257                styles.len(),
2258                node_index.len()
2259            )));
2260        }
2261        Ok(Self {
2262            node_index,
2263            bounds,
2264            styles,
2265        })
2266    }
2267
2268    fn len(&self) -> usize {
2269        self.node_index.len()
2270    }
2271
2272    fn node_index(&self, idx: usize) -> Result<i64, CdpError> {
2273        self.node_index
2274            .get(idx)
2275            .copied()
2276            .ok_or_else(|| malformed(format!("layout.nodeIndex missing slot {idx}")))
2277    }
2278
2279    fn rect_at(&self, idx: usize) -> Result<Option<Rect>, CdpError> {
2280        let Some(rectangle) = self.bounds.get(idx) else {
2281            return Ok(None);
2282        };
2283        let inner = rectangle.inner();
2284        if inner.is_empty() {
2285            return Ok(None);
2286        }
2287        if inner.len() != 4 {
2288            return Err(malformed(format!(
2289                "layout.bounds slot {idx} has length {} (expected 4)",
2290                inner.len()
2291            )));
2292        }
2293        Ok(Some(rect_from_bounds(inner)))
2294    }
2295
2296    fn styles_at(
2297        &self,
2298        idx: usize,
2299        strings: &[String],
2300    ) -> Result<Option<IndexMap<String, String>>, CdpError> {
2301        let Some(entry) = self.styles.get(idx) else {
2302            return Ok(None);
2303        };
2304        let style_indices = entry.inner();
2305        if style_indices.is_empty() {
2306            return Ok(Some(IndexMap::new()));
2307        }
2308        if style_indices.len() != COMPUTED_STYLE_WHITELIST.len() {
2309            return Err(malformed(format!(
2310                "layout.styles[{idx}] length {} disagrees with whitelist length {}",
2311                style_indices.len(),
2312                COMPUTED_STYLE_WHITELIST.len()
2313            )));
2314        }
2315        let mut out = IndexMap::with_capacity(style_indices.len());
2316        for (slot, prop) in style_indices.iter().zip(COMPUTED_STYLE_WHITELIST.iter()) {
2317            let raw = *slot.inner();
2318            let Some(value) = lookup_optional_string(strings, raw)? else {
2319                // CDP uses `-1` to indicate "no value" for this property on
2320                // this node — skip rather than insert empty strings.
2321                continue;
2322            };
2323            if value.is_empty() {
2324                continue;
2325            }
2326            out.insert((*prop).to_string(), value.to_string());
2327        }
2328        Ok(Some(out))
2329    }
2330}
2331
2332fn rect_from_bounds(inner: &[f64]) -> Rect {
2333    // CDP returns CSS pixel floats. Round to the nearest integer for a
2334    // stable representation; clamp width/height at zero to satisfy the
2335    // `u32` shape on collapsed boxes (Chromium occasionally emits tiny
2336    // negative floats around -0.0).
2337    #[allow(
2338        clippy::cast_possible_truncation,
2339        clippy::cast_sign_loss,
2340        clippy::cast_precision_loss
2341    )]
2342    // SAFETY (cast lints): values are bounded by viewport dimensions
2343    // (i32 fits viewport widths/heights up to ~2.1B px) and are clamped
2344    // non-negative before unsigned cast.
2345    let x = inner[0].round() as i32;
2346    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
2347    let y = inner[1].round() as i32;
2348    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
2349    let width = inner[2].round().max(0.0) as u32;
2350    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
2351    let height = inner[3].round().max(0.0) as u32;
2352    Rect {
2353        x,
2354        y,
2355        width,
2356        height,
2357    }
2358}
2359
2360#[cfg(test)]
2361mod tests {
2362    use super::{
2363        COMPUTED_STYLE_WHITELIST, CdpError, MAX_SUPPORTED_CHROMIUM_MAJOR,
2364        MIN_SUPPORTED_CHROMIUM_MAJOR,
2365    };
2366
2367    #[test]
2368    fn style_whitelist_has_36_properties() {
2369        assert_eq!(
2370            COMPUTED_STYLE_WHITELIST.len(),
2371            36,
2372            "PRD §10.3 pins exactly 36 computed-style properties"
2373        );
2374    }
2375
2376    #[test]
2377    fn style_whitelist_pins_canonical_order() {
2378        // Locks the exact order from PRD §10.3. If the list grows or the
2379        // order changes, the rule engine's interpretation of the parallel
2380        // style indices coming back from Chromium silently breaks.
2381        let expected: [&str; 36] = [
2382            "font-size",
2383            "font-family",
2384            "font-weight",
2385            "line-height",
2386            "color",
2387            "background-color",
2388            "border-top-color",
2389            "border-right-color",
2390            "border-bottom-color",
2391            "border-left-color",
2392            "border-top-width",
2393            "border-right-width",
2394            "border-bottom-width",
2395            "border-left-width",
2396            "border-top-left-radius",
2397            "border-top-right-radius",
2398            "border-bottom-right-radius",
2399            "border-bottom-left-radius",
2400            "margin-top",
2401            "margin-right",
2402            "margin-bottom",
2403            "margin-left",
2404            "padding-top",
2405            "padding-right",
2406            "padding-bottom",
2407            "padding-left",
2408            "gap",
2409            "row-gap",
2410            "column-gap",
2411            "display",
2412            "position",
2413            "box-shadow",
2414            "opacity",
2415            "z-index",
2416            "width",
2417            "height",
2418        ];
2419        assert_eq!(COMPUTED_STYLE_WHITELIST, &expected);
2420    }
2421
2422    #[test]
2423    fn parses_product_major_versions() {
2424        assert_eq!(
2425            super::chromium_major_from_product("Chrome/131.0.6778.204"),
2426            Some(131)
2427        );
2428        assert_eq!(
2429            super::chromium_major_from_product("HeadlessChrome/131.0.6778.204"),
2430            Some(131)
2431        );
2432        assert_eq!(
2433            super::chromium_major_from_product("Chromium/131.0.6778.204"),
2434            Some(131)
2435        );
2436        assert_eq!(super::chromium_major_from_product("Chrome"), None);
2437        assert_eq!(
2438            super::chromium_major_from_product("Chrome/not-a-version"),
2439            None
2440        );
2441    }
2442
2443    #[test]
2444    fn detects_unsupported_chromium_major() {
2445        // Below the minimum is rejected.
2446        let below = MIN_SUPPORTED_CHROMIUM_MAJOR - 1;
2447        let below_product = format!("Chrome/{below}.0.0.0");
2448        let below_result = super::validate_chromium_product_major(&below_product);
2449        assert!(matches!(
2450            below_result,
2451            Err(CdpError::UnsupportedChromium {
2452                min_supported: MIN_SUPPORTED_CHROMIUM_MAJOR,
2453                max_supported: MAX_SUPPORTED_CHROMIUM_MAJOR,
2454                found,
2455            }) if found == below
2456        ));
2457
2458        // Above the maximum is rejected.
2459        let above = MAX_SUPPORTED_CHROMIUM_MAJOR + 1;
2460        let above_product = format!("Chrome/{above}.0.0.0");
2461        let above_result = super::validate_chromium_product_major(&above_product);
2462        assert!(matches!(
2463            above_result,
2464            Err(CdpError::UnsupportedChromium {
2465                min_supported: MIN_SUPPORTED_CHROMIUM_MAJOR,
2466                max_supported: MAX_SUPPORTED_CHROMIUM_MAJOR,
2467                found,
2468            }) if found == above
2469        ));
2470    }
2471
2472    #[test]
2473    fn accepts_supported_chromium_majors() {
2474        // Min, max, and an in-between value (140) all pass.
2475        let lower_bound = format!("HeadlessChrome/{MIN_SUPPORTED_CHROMIUM_MAJOR}.0.0.0");
2476        assert!(super::validate_chromium_product_major(&lower_bound).is_ok());
2477
2478        let upper_bound = format!("HeadlessChrome/{MAX_SUPPORTED_CHROMIUM_MAJOR}.0.0.0");
2479        assert!(super::validate_chromium_product_major(&upper_bound).is_ok());
2480
2481        let in_range = "HeadlessChrome/140.0.0.0";
2482        assert!(super::validate_chromium_product_major(in_range).is_ok());
2483    }
2484
2485    #[test]
2486    fn lookup_string_rejects_negative_index() {
2487        let strings = vec!["hello".to_string()];
2488        let err = super::lookup_string(&strings, -1).unwrap_err();
2489        assert!(
2490            matches!(err, CdpError::MalformedSnapshot { ref reason } if reason.contains("negative string index")),
2491            "expected MalformedSnapshot for negative index, got {err:?}"
2492        );
2493    }
2494
2495    #[test]
2496    fn lookup_string_rejects_out_of_range() {
2497        let strings = vec!["hello".to_string()];
2498        let err = super::lookup_string(&strings, 5).unwrap_err();
2499        assert!(
2500            matches!(err, CdpError::MalformedSnapshot { ref reason } if reason.contains("out of range")),
2501            "expected MalformedSnapshot for OOB index, got {err:?}"
2502        );
2503    }
2504
2505    #[test]
2506    fn lookup_string_resolves_valid_index() {
2507        let strings = vec!["hello".to_string(), "world".to_string()];
2508        assert_eq!(super::lookup_string(&strings, 0).unwrap(), "hello");
2509        assert_eq!(super::lookup_string(&strings, 1).unwrap(), "world");
2510    }
2511
2512    #[test]
2513    fn lookup_optional_string_returns_none_for_sentinel() {
2514        let strings = vec!["hello".to_string()];
2515        assert_eq!(super::lookup_optional_string(&strings, -1).unwrap(), None);
2516        // Other negative values also map to None.
2517        assert_eq!(super::lookup_optional_string(&strings, -42).unwrap(), None);
2518    }
2519
2520    #[test]
2521    fn lookup_optional_string_resolves_valid_index() {
2522        let strings = vec!["hello".to_string(), "world".to_string()];
2523        assert_eq!(
2524            super::lookup_optional_string(&strings, 0).unwrap(),
2525            Some("hello")
2526        );
2527        assert_eq!(
2528            super::lookup_optional_string(&strings, 1).unwrap(),
2529            Some("world")
2530        );
2531    }
2532
2533    #[test]
2534    fn lookup_optional_string_rejects_out_of_range() {
2535        let strings = vec!["hello".to_string()];
2536        let err = super::lookup_optional_string(&strings, 5).unwrap_err();
2537        assert!(
2538            matches!(err, CdpError::MalformedSnapshot { ref reason } if reason.contains("out of range")),
2539            "expected MalformedSnapshot for OOB index, got {err:?}"
2540        );
2541    }
2542
2543    use super::{Cookie, StorageState, parse_header_kv};
2544
2545    #[test]
2546    fn cookie_parse_kv_accepts_simple_pair() {
2547        let c = Cookie::parse_kv("session=abc123").unwrap();
2548        assert_eq!(c.name, "session");
2549        assert_eq!(c.value, "abc123");
2550        assert!(c.url.is_none());
2551    }
2552
2553    #[test]
2554    fn cookie_parse_kv_rejects_missing_separator() {
2555        let err = Cookie::parse_kv("nosep").unwrap_err();
2556        assert!(matches!(err, CdpError::InvalidCookie { .. }));
2557    }
2558
2559    #[test]
2560    fn cookie_parse_kv_rejects_empty_name() {
2561        let err = Cookie::parse_kv("=value").unwrap_err();
2562        assert!(matches!(err, CdpError::InvalidCookie { .. }));
2563    }
2564
2565    #[test]
2566    fn cookie_parse_kv_rejects_crlf_in_value() {
2567        let err = Cookie::parse_kv("name=hello\r\nSet-Cookie: pwn=1").unwrap_err();
2568        match err {
2569            CdpError::InvalidCookie { field, reason, .. } => {
2570                assert_eq!(field, "value");
2571                assert!(reason.contains("control characters"));
2572            }
2573            other => panic!("expected InvalidCookie, got {other:?}"),
2574        }
2575    }
2576
2577    #[test]
2578    fn header_parse_kv_accepts_pair() {
2579        let (n, v) = parse_header_kv("X-Trace-Id: 12345").unwrap();
2580        assert_eq!(n, "X-Trace-Id");
2581        assert_eq!(v, "12345");
2582    }
2583
2584    #[test]
2585    fn header_parse_kv_rejects_missing_colon() {
2586        let err = parse_header_kv("nope").unwrap_err();
2587        assert!(matches!(err, CdpError::InvalidHeader { .. }));
2588    }
2589
2590    #[test]
2591    fn header_parse_kv_rejects_lf_in_value() {
2592        let err = parse_header_kv("X-Pwn: hi\nInjected: 1").unwrap_err();
2593        assert!(matches!(err, CdpError::InvalidHeader { .. }));
2594    }
2595
2596    #[test]
2597    fn header_parse_kv_rejects_space_in_name() {
2598        let err = parse_header_kv("X Header: 1").unwrap_err();
2599        assert!(matches!(err, CdpError::InvalidHeader { .. }));
2600    }
2601
2602    #[test]
2603    fn validate_header_name_rejects_colon() {
2604        // Library-boundary check: a downstream consumer might construct
2605        // `headers: vec![("Foo:Bar".into(), "1".into())]` directly.
2606        // `parse_header_kv` would split that, but
2607        // `install_extra_headers` calls the validator straight on the
2608        // tuple — so the validator must catch `:` itself.
2609        let err = super::validate_header_name("Foo:Bar").unwrap_err();
2610        assert!(matches!(err, CdpError::InvalidHeader { field: "name", .. }));
2611    }
2612
2613    #[test]
2614    fn validate_header_name_rejects_whitespace() {
2615        let err = super::validate_header_name("X Header").unwrap_err();
2616        assert!(matches!(err, CdpError::InvalidHeader { .. }));
2617        let err = super::validate_header_name("X\tHeader").unwrap_err();
2618        assert!(matches!(err, CdpError::InvalidHeader { .. }));
2619    }
2620
2621    #[test]
2622    fn validate_header_name_rejects_control_bytes() {
2623        // Every C0 control byte (and DEL) is rejected. Spot-check the
2624        // canonical ones plus a non-CRLF C1-adjacent byte (BEL, 0x07).
2625        for &c in b"\r\n\0\x07\x1b\x7f" {
2626            let name = format!("X-Hi{}Foo", c as char);
2627            let err = super::validate_header_name(&name).unwrap_err();
2628            assert!(
2629                matches!(err, CdpError::InvalidHeader { .. }),
2630                "expected InvalidHeader for byte {c:#x}, got {err:?}"
2631            );
2632        }
2633    }
2634
2635    #[test]
2636    fn validate_cookie_name_rejects_equals_and_whitespace() {
2637        // Library-boundary: `Cookie { name: "foo=bar", .. }` would be
2638        // accepted by the parser (it splits on the *first* `=`) but
2639        // direct construction would let `=` through. The standalone
2640        // validator must reject it.
2641        let err = super::validate_cookie_name("foo=bar").unwrap_err();
2642        assert!(matches!(err, CdpError::InvalidCookie { field: "name", .. }));
2643        let err = super::validate_cookie_name("foo bar").unwrap_err();
2644        assert!(matches!(err, CdpError::InvalidCookie { .. }));
2645    }
2646
2647    #[test]
2648    fn validate_cookie_value_rejects_full_c0_range() {
2649        // Tightened beyond CR/LF/NUL — every C0 byte and DEL is now
2650        // rejected. Tab is in the C0 range so it's also rejected.
2651        for c in 0u8..0x20 {
2652            let value = format!("v{}x", c as char);
2653            let err = super::validate_cookie_value(&value).unwrap_err();
2654            assert!(
2655                matches!(err, CdpError::InvalidCookie { .. }),
2656                "expected InvalidCookie for byte {c:#x}, got {err:?}"
2657            );
2658        }
2659        let err = super::validate_cookie_value("v\x7fx").unwrap_err();
2660        assert!(matches!(err, CdpError::InvalidCookie { .. }));
2661    }
2662
2663    #[test]
2664    fn storage_state_parses_minimal_payload() {
2665        let json = r#"{
2666            "cookies": [
2667                {"name":"a","value":"1","domain":".example.com","path":"/","expires":-1,"httpOnly":false,"secure":false,"sameSite":"Lax"}
2668            ],
2669            "origins": [
2670                {"origin":"https://example.com","localStorage":[{"name":"k","value":"v"}]}
2671            ]
2672        }"#;
2673        let state = StorageState::parse_str(json).unwrap();
2674        assert_eq!(state.cookies.len(), 1);
2675        assert_eq!(state.cookies[0].name, "a");
2676        assert_eq!(state.origins.len(), 1);
2677        assert_eq!(state.origins[0].origin, "https://example.com");
2678        assert_eq!(state.origins[0].local_storage[0].name, "k");
2679    }
2680
2681    #[test]
2682    fn storage_state_parses_empty_payload() {
2683        let state = StorageState::parse_str(r#"{"cookies":[],"origins":[]}"#).unwrap();
2684        assert!(state.cookies.is_empty());
2685        assert!(state.origins.is_empty());
2686    }
2687
2688    #[test]
2689    fn storage_state_rejects_unknown_fields() {
2690        let json = r#"{"cookies":[],"origins":[],"unexpected":42}"#;
2691        let err = StorageState::parse_str(json).unwrap_err();
2692        assert!(matches!(err, CdpError::MalformedStorageState { .. }));
2693    }
2694
2695    #[test]
2696    fn storage_state_parse_str_rejects_crlf_in_cookie_domain() {
2697        // `parse_str` is the canonical validation entry point —
2698        // `load_from_path` delegates to it. Drive it directly so the
2699        // test doesn't need disk I/O or a CWD swap.
2700        let json = "{\"cookies\":[{\"name\":\"a\",\"value\":\"1\",\
2701            \"domain\":\"evil\\r\\nSet-Cookie: x=y\",\"path\":\"/\",\
2702            \"expires\":-1,\"httpOnly\":false,\"secure\":false,\"sameSite\":\"Lax\"}],\
2703            \"origins\":[]}";
2704        let err = StorageState::parse_str(json).unwrap_err();
2705        match err {
2706            CdpError::InvalidCookie { field, reason, .. } => {
2707                assert_eq!(field, "domain");
2708                assert!(reason.contains("control characters"));
2709            }
2710            other => panic!("expected InvalidCookie domain rejection, got {other:?}"),
2711        }
2712    }
2713
2714    #[test]
2715    fn storage_state_parse_str_rejects_crlf_in_cookie_path() {
2716        let json = "{\"cookies\":[{\"name\":\"a\",\"value\":\"1\",\
2717            \"domain\":\"example.com\",\"path\":\"/foo\\nbar\",\
2718            \"expires\":-1,\"httpOnly\":false,\"secure\":false,\"sameSite\":\"Lax\"}],\
2719            \"origins\":[]}";
2720        let err = StorageState::parse_str(json).unwrap_err();
2721        match err {
2722            CdpError::InvalidCookie { field, reason, .. } => {
2723                assert_eq!(field, "path");
2724                assert!(reason.contains("control characters"));
2725            }
2726            other => panic!("expected InvalidCookie path rejection, got {other:?}"),
2727        }
2728    }
2729
2730    #[test]
2731    fn storage_state_parse_str_rejects_full_c0_range_in_cookie_value() {
2732        // M1 + M3: the parser rejects every C0 byte (and DEL) in
2733        // cookie value, not only CR/LF/NUL.
2734        let json = "{\"cookies\":[{\"name\":\"a\",\"value\":\"v\\u001bx\",\
2735            \"domain\":\"example.com\",\"path\":\"/\",\
2736            \"expires\":-1,\"httpOnly\":false,\"secure\":false,\"sameSite\":\"Lax\"}],\
2737            \"origins\":[]}";
2738        let err = StorageState::parse_str(json).unwrap_err();
2739        assert!(matches!(
2740            err,
2741            CdpError::InvalidCookie { field: "value", .. }
2742        ));
2743    }
2744
2745    #[test]
2746    fn target_default_sets_capture_knobs() {
2747        let t = super::Target::default();
2748        assert!(t.disable_animations);
2749        assert!(t.hide_scrollbars);
2750        assert!(t.wait_for_selector.is_none());
2751        assert!(t.wait_ms.is_none());
2752        assert!(t.pin_dpr.is_none());
2753    }
2754
2755    #[test]
2756    fn target_effective_dpr_prefers_pin_over_default() {
2757        let mut t = super::Target {
2758            device_pixel_ratio: 1.0,
2759            ..super::Target::default()
2760        };
2761        assert!((t.effective_dpr() - 1.0).abs() < f64::EPSILON);
2762        t.pin_dpr = Some(3.0);
2763        assert!((t.effective_dpr() - 3.0).abs() < f64::EPSILON);
2764    }
2765
2766    #[test]
2767    fn origin_of_handles_https_url() {
2768        assert_eq!(
2769            super::origin_of("https://example.com/path?q=1").as_deref(),
2770            Some("https://example.com")
2771        );
2772        assert_eq!(
2773            super::origin_of("http://example.com:8080/").as_deref(),
2774            Some("http://example.com:8080")
2775        );
2776        assert_eq!(super::origin_of("notaurl").as_deref(), None);
2777    }
2778
2779    #[test]
2780    fn origin_of_strips_default_ports() {
2781        // WHATWG origin: default ports are elided.
2782        assert_eq!(
2783            super::origin_of("https://example.com:443/").as_deref(),
2784            Some("https://example.com")
2785        );
2786        assert_eq!(
2787            super::origin_of("http://example.com:80/").as_deref(),
2788            Some("http://example.com")
2789        );
2790    }
2791
2792    #[test]
2793    fn origin_of_normalizes_scheme_and_host_case() {
2794        assert_eq!(
2795            super::origin_of("HTTPS://Example.COM/path").as_deref(),
2796            Some("https://example.com")
2797        );
2798    }
2799
2800    #[test]
2801    fn origin_of_strips_userinfo_query_fragment() {
2802        assert_eq!(
2803            super::origin_of("https://user:pw@example.com/p?q=1#frag").as_deref(),
2804            Some("https://example.com")
2805        );
2806    }
2807
2808    #[test]
2809    fn origin_of_returns_none_for_opaque_origins() {
2810        // `data:` and `file:` URLs have opaque origins and cannot match
2811        // a Playwright-recorded site origin.
2812        assert_eq!(super::origin_of("data:text/plain,hello").as_deref(), None);
2813    }
2814
2815    /// A synthetic `DOMSnapshot.captureSnapshot` response matching the
2816    /// CDP wire format. The DOM is:
2817    ///
2818    /// ```text
2819    /// html > body > p > #text("Hello")
2820    ///             > div > span
2821    /// ```
2822    ///
2823    /// CDP owns the inline text box on the `#text` node (node index 3),
2824    /// not on the `<p>`. The container `<div>` has only an element child,
2825    /// so no text box references it.
2826    fn capture_returns_with_text_box() -> super::CaptureSnapshotReturns {
2827        let value = serde_json::json!({
2828            "documents": [{
2829                "documentURL": 0, "title": 0, "baseURL": 0,
2830                "contentLanguage": 0, "encodingName": 0, "publicId": 0,
2831                "systemId": 0, "frameId": 0,
2832                "nodes": {
2833                    // index:        0   1   2   3   4   5
2834                    //             html body p  #txt div span
2835                    "parentIndex": [-1,  0,  1,  2,  1,  4],
2836                    "nodeType":    [ 1,  1,  1,  3,  1,  1],
2837                    "nodeName":    [ 1,  2,  3,  4,  5,  6]
2838                },
2839                "layout": {
2840                    // layout slot:  0(p) 1(#text) 2(div) 3(span)
2841                    "nodeIndex": [2, 3, 4, 5],
2842                    "styles": [],
2843                    "bounds": [
2844                        [10.0, 10.0, 100.0, 20.0],
2845                        [10.0, 12.0,  40.0, 16.0],
2846                        [10.0, 40.0, 200.0, 50.0],
2847                        [10.0, 40.0,   0.0,  0.0]
2848                    ],
2849                    "text": [],
2850                    "stackingContexts": { "index": [] }
2851                },
2852                "textBoxes": {
2853                    // Owned by layout slot 1 — the `#text` node.
2854                    "layoutIndex": [1],
2855                    "bounds": [[10.0, 12.0, 40.0, 16.0]],
2856                    "start": [0],
2857                    "length": [5]
2858                }
2859            }],
2860            "strings": ["", "HTML", "BODY", "P", "#text", "DIV", "SPAN"]
2861        });
2862        serde_json::from_value(value).expect("synthetic CDP response must deserialize")
2863    }
2864
2865    #[test]
2866    fn text_box_reattributed_to_nearest_ancestor_element() {
2867        let target = super::Target {
2868            url: "https://example.com/".to_string(),
2869            ..super::Target::default()
2870        };
2871        let snap = super::flatten_snapshot(&target, &capture_returns_with_text_box())
2872            .expect("flatten must succeed for the synthetic response");
2873
2874        // `<p>` is the third element in source order → dom_order 2.
2875        let p = snap
2876            .nodes
2877            .iter()
2878            .find(|n| n.tag == "p")
2879            .expect("`<p>` element must survive flattening");
2880        let div = snap
2881            .nodes
2882            .iter()
2883            .find(|n| n.tag == "div")
2884            .expect("`<div>` element must survive flattening");
2885
2886        // Exactly one text box, attributed to the `<p>` (the nearest
2887        // ancestor element of the `#text` layout node), not dropped.
2888        assert_eq!(snap.text_boxes.len(), 1, "the single text run survives");
2889        let tb = &snap.text_boxes[0];
2890        assert_eq!(
2891            tb.dom_order, p.dom_order,
2892            "text box must attribute to the `<p>`, not the `#text` node"
2893        );
2894        assert_eq!(tb.length, 5, "\"Hello\" is 5 UTF-16 units");
2895        assert_eq!(tb.start, 0);
2896
2897        // The container `<div>` has only an element child, so no text box
2898        // references it.
2899        assert!(
2900            snap.text_boxes.iter().all(|b| b.dom_order != div.dom_order),
2901            "container `<div>` with only element children must carry no text box"
2902        );
2903    }
2904
2905    #[test]
2906    fn text_box_reattribution_is_byte_deterministic() {
2907        let target = super::Target::default();
2908        let a =
2909            super::flatten_snapshot(&target, &capture_returns_with_text_box()).expect("flatten a");
2910        let b =
2911            super::flatten_snapshot(&target, &capture_returns_with_text_box()).expect("flatten b");
2912        assert_eq!(
2913            serde_json::to_string(&a).expect("serialize a"),
2914            serde_json::to_string(&b).expect("serialize b"),
2915            "two flattens of identical input must match byte-for-byte"
2916        );
2917    }
2918}