Skip to main content

stygian_browser/
page.rs

1//! Page and browsing context management for isolated, parallel scraping
2//!
3//! Each `BrowserContext` (future) is an incognito-style isolation boundary (separate
4//! cookies, localStorage, cache).  Each context can contain many [`PageHandle`]s
5//! (tabs).  Both types clean up their CDP resources automatically on drop.
6//!
7//! ## Resource blocking
8//!
9//! Pass a [`ResourceFilter`] to [`PageHandle::set_resource_filter`] to intercept
10//! and block specific request types (images, fonts, CSS) before page load —
11//! significantly reducing page load times for text-only scraping.
12//!
13//! ## Wait strategies
14//!
15//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
16//! - `DomContentLoaded` — fires when the HTML is parsed
17//! - `NetworkIdle` — fires when there are ≤2 in-flight requests for 500 ms
18//! - `Selector(css)` — fires when a CSS selector matches an element
19//!
20//! # Example
21//!
22//! ```no_run
23//! use stygian_browser::{BrowserPool, BrowserConfig};
24//! use stygian_browser::page::{ResourceFilter, WaitUntil};
25//! use std::time::Duration;
26//!
27//! # async fn run() -> stygian_browser::error::Result<()> {
28//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
29//! let handle = pool.acquire().await?;
30//!
31//! let mut page = handle.browser().expect("valid browser").new_page().await?;
32//! page.set_resource_filter(ResourceFilter::block_media()).await?;
33//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
34//! let title = page.title().await?;
35//! println!("title: {title}");
36//! handle.release().await;
37//! # Ok(())
38//! # }
39//! ```
40
41use std::collections::HashMap;
42use std::sync::{
43    Arc,
44    atomic::{AtomicU16, Ordering},
45};
46use std::time::Duration;
47
48use chromiumoxide::Page;
49use tokio::time::timeout;
50use tracing::{debug, warn};
51
52use crate::error::{BrowserError, Result};
53
54// ─── ResourceType ─────────────────────────────────────────────────────────────
55
56/// CDP resource types that can be intercepted.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub enum ResourceType {
59    /// `<img>`, `<picture>`, background images
60    Image,
61    /// Web fonts loaded via CSS `@font-face`
62    Font,
63    /// External CSS stylesheets
64    Stylesheet,
65    /// Media files (audio/video)
66    Media,
67}
68
69impl ResourceType {
70    /// Returns the string used in CDP `Network.requestIntercepted` events.
71    pub const fn as_cdp_str(&self) -> &'static str {
72        match self {
73            Self::Image => "Image",
74            Self::Font => "Font",
75            Self::Stylesheet => "Stylesheet",
76            Self::Media => "Media",
77        }
78    }
79}
80
81// ─── ResourceFilter ───────────────────────────────────────────────────────────
82
83/// Set of resource types to block from loading.
84///
85/// # Example
86///
87/// ```
88/// use stygian_browser::page::ResourceFilter;
89/// let filter = ResourceFilter::block_media();
90/// assert!(filter.should_block("Image"));
91/// ```
92#[derive(Debug, Clone, Default)]
93pub struct ResourceFilter {
94    blocked: Vec<ResourceType>,
95}
96
97impl ResourceFilter {
98    /// Block all media resources (images, fonts, CSS, audio/video).
99    pub fn block_media() -> Self {
100        Self {
101            blocked: vec![
102                ResourceType::Image,
103                ResourceType::Font,
104                ResourceType::Stylesheet,
105                ResourceType::Media,
106            ],
107        }
108    }
109
110    /// Block only images and fonts (keep styles for layout-sensitive work).
111    pub fn block_images_and_fonts() -> Self {
112        Self {
113            blocked: vec![ResourceType::Image, ResourceType::Font],
114        }
115    }
116
117    /// Add a resource type to the block list.
118    #[must_use]
119    pub fn block(mut self, resource: ResourceType) -> Self {
120        if !self.blocked.contains(&resource) {
121            self.blocked.push(resource);
122        }
123        self
124    }
125
126    /// Returns `true` if the given CDP resource type string should be blocked.
127    pub fn should_block(&self, cdp_type: &str) -> bool {
128        self.blocked
129            .iter()
130            .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
131    }
132
133    /// Returns `true` if no resource types are blocked.
134    pub const fn is_empty(&self) -> bool {
135        self.blocked.is_empty()
136    }
137}
138
139// ─── WaitUntil ────────────────────────────────────────────────────────────────
140
141/// Condition to wait for after a navigation.
142///
143/// # Example
144///
145/// ```
146/// use stygian_browser::page::WaitUntil;
147/// let w = WaitUntil::Selector("#main".to_string());
148/// assert!(matches!(w, WaitUntil::Selector(_)));
149/// ```
150#[derive(Debug, Clone)]
151pub enum WaitUntil {
152    /// Wait for the `Page.domContentEventFired` CDP event — fires when the HTML
153    /// document has been fully parsed and the DOM is ready, before subresources
154    /// such as images and stylesheets finish loading.
155    DomContentLoaded,
156    /// Wait for the `Page.loadEventFired` CDP event **and** then wait until no
157    /// more than 2 network requests are in-flight for at least 500 ms
158    /// (equivalent to Playwright's `networkidle2`).
159    NetworkIdle,
160    /// Wait until `document.querySelector(selector)` returns a non-null element.
161    Selector(String),
162}
163
164// ─── NodeHandle ───────────────────────────────────────────────────────────────
165
166/// A handle to a live DOM node backed by a CDP `RemoteObjectId`.
167///
168/// Obtained via [`PageHandle::query_selector_all`].  Each method issues one or
169/// more CDP `Runtime.callFunctionOn` calls against the held V8 remote object
170/// reference — no HTML serialisation occurs.
171///
172/// A handle becomes **stale** after page navigation or if the underlying DOM
173/// node is removed.  Stale calls return [`BrowserError::StaleNode`] so callers
174/// can distinguish them from other CDP failures.
175///
176/// # Example
177///
178/// ```no_run
179/// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
180/// use std::time::Duration;
181///
182/// # async fn run() -> stygian_browser::error::Result<()> {
183/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
184/// let handle = pool.acquire().await?;
185/// let mut page = handle.browser().expect("valid browser").new_page().await?;
186/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
187///
188/// for node in page.query_selector_all("a[href]").await? {
189///     let href = node.attr("href").await?;
190///     let text = node.text_content().await?;
191///     println!("{text}: {href:?}");
192/// }
193/// # Ok(())
194/// # }
195/// ```
196pub struct NodeHandle {
197    element: chromiumoxide::element::Element,
198    /// Original CSS selector — preserved for stale-node error messages only.
199    /// Shared via `Arc<str>` so all handles from a single query reuse the
200    /// same allocation rather than cloning a `String` per node.
201    selector: Arc<str>,
202    cdp_timeout: Duration,
203}
204
205impl NodeHandle {
206    /// Return a single attribute value, or `None` if the attribute is absent.
207    ///
208    /// Issues one `Runtime.callFunctionOn` CDP call (`el.getAttribute(name)`).
209    ///
210    /// # Errors
211    ///
212    /// Returns [`BrowserError::StaleNode`] when the remote object has been
213    /// invalidated, or [`BrowserError::Timeout`] / [`BrowserError::CdpError`]
214    /// on transport-level failures.
215    pub async fn attr(&self, name: &str) -> Result<Option<String>> {
216        timeout(self.cdp_timeout, self.element.attribute(name))
217            .await
218            .map_err(|_| BrowserError::Timeout {
219                operation: "NodeHandle::attr".to_string(),
220                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
221            })?
222            .map_err(|e| self.cdp_err_or_stale(&e, "attr"))
223    }
224
225    /// Return all attributes as a `HashMap<name, value>` in a **single**
226    /// CDP round-trip.
227    ///
228    /// Uses `DOM.getAttributes` (via the chromiumoxide `attributes()` API)
229    /// which returns a flat `[name, value, name, value, …]` list from the node
230    /// description — no per-attribute calls are needed.
231    ///
232    /// # Errors
233    ///
234    /// Returns [`BrowserError::StaleNode`] when the remote object has been
235    /// invalidated.
236    pub async fn attr_map(&self) -> Result<HashMap<String, String>> {
237        let flat = timeout(self.cdp_timeout, self.element.attributes())
238            .await
239            .map_err(|_| BrowserError::Timeout {
240                operation: "NodeHandle::attr_map".to_string(),
241                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
242            })?
243            .map_err(|e| self.cdp_err_or_stale(&e, "attr_map"))?;
244
245        let mut map = HashMap::with_capacity(flat.len() / 2);
246        for pair in flat.chunks_exact(2) {
247            if let [name, value] = pair {
248                map.insert(name.clone(), value.clone());
249            }
250        }
251        Ok(map)
252    }
253
254    /// Return the element's `textContent` (all text inside, no markup).
255    ///
256    /// Reads the DOM `textContent` property via a single JS eval — this is the
257    /// raw text concatenation of all descendant text nodes, independent of
258    /// layout or visibility (unlike `innerText`).
259    ///
260    /// Returns an empty string when the property is absent or null.
261    ///
262    /// # Errors
263    ///
264    /// Returns [`BrowserError::StaleNode`] when the remote object has been
265    /// invalidated.
266    pub async fn text_content(&self) -> Result<String> {
267        let returns = timeout(
268            self.cdp_timeout,
269            self.element
270                .call_js_fn(r"function() { return this.textContent ?? ''; }", true),
271        )
272        .await
273        .map_err(|_| BrowserError::Timeout {
274            operation: "NodeHandle::text_content".to_string(),
275            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
276        })?
277        .map_err(|e| self.cdp_err_or_stale(&e, "text_content"))?;
278
279        Ok(returns
280            .result
281            .value
282            .as_ref()
283            .and_then(|v| v.as_str())
284            .unwrap_or("")
285            .to_string())
286    }
287
288    /// Return the element's `innerHTML`.
289    ///
290    /// Returns an empty string when the property is absent or null.
291    ///
292    /// # Errors
293    ///
294    /// Returns [`BrowserError::StaleNode`] when the remote object has been
295    /// invalidated.
296    pub async fn inner_html(&self) -> Result<String> {
297        timeout(self.cdp_timeout, self.element.inner_html())
298            .await
299            .map_err(|_| BrowserError::Timeout {
300                operation: "NodeHandle::inner_html".to_string(),
301                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
302            })?
303            .map_err(|e| self.cdp_err_or_stale(&e, "inner_html"))
304            .map(Option::unwrap_or_default)
305    }
306
307    /// Return the element's `outerHTML`.
308    ///
309    /// Returns an empty string when the property is absent or null.
310    ///
311    /// # Errors
312    ///
313    /// Returns [`BrowserError::StaleNode`] when the remote object has been
314    /// invalidated.
315    pub async fn outer_html(&self) -> Result<String> {
316        timeout(self.cdp_timeout, self.element.outer_html())
317            .await
318            .map_err(|_| BrowserError::Timeout {
319                operation: "NodeHandle::outer_html".to_string(),
320                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
321            })?
322            .map_err(|e| self.cdp_err_or_stale(&e, "outer_html"))
323            .map(Option::unwrap_or_default)
324    }
325
326    /// Return the ancestor tag-name chain, root-last.
327    ///
328    /// Executes a single `Runtime.callFunctionOn` JavaScript function that
329    /// walks `parentElement` and collects tag names — no repeated CDP calls.
330    ///
331    /// ```text
332    /// // for <span> inside <p> inside <article> inside <body> inside <html>
333    /// ["p", "article", "body", "html"]
334    /// ```
335    ///
336    /// # Errors
337    ///
338    /// Returns [`BrowserError::StaleNode`] when the remote object has been
339    /// invalidated, or [`BrowserError::ScriptExecutionFailed`] when CDP
340    /// returns no value or the value is not a string array.
341    pub async fn ancestors(&self) -> Result<Vec<String>> {
342        let returns = timeout(
343            self.cdp_timeout,
344            self.element.call_js_fn(
345                r"function() {
346                    const a = [];
347                    let n = this.parentElement;
348                    while (n) { a.push(n.tagName.toLowerCase()); n = n.parentElement; }
349                    return a;
350                }",
351                true,
352            ),
353        )
354        .await
355        .map_err(|_| BrowserError::Timeout {
356            operation: "NodeHandle::ancestors".to_string(),
357            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
358        })?
359        .map_err(|e| self.cdp_err_or_stale(&e, "ancestors"))?;
360
361        // With returnByValue=true and an array return, CDP delivers the value
362        // as a JSON array directly — no JSON.stringify/re-parse needed.
363        // A missing or wrong-type value indicates an unexpected CDP failure.
364        let arr = returns
365            .result
366            .value
367            .as_ref()
368            .and_then(|v| v.as_array())
369            .ok_or_else(|| BrowserError::ScriptExecutionFailed {
370                script: "NodeHandle::ancestors".to_string(),
371                reason: "CDP returned no value or a non-array value for ancestors()".to_string(),
372            })?;
373
374        arr.iter()
375            .map(|v| {
376                v.as_str().map(ToString::to_string).ok_or_else(|| {
377                    BrowserError::ScriptExecutionFailed {
378                        script: "NodeHandle::ancestors".to_string(),
379                        reason: format!("ancestor entry is not a string: {v}"),
380                    }
381                })
382            })
383            .collect()
384    }
385
386    /// Return child elements matching `selector` as new [`NodeHandle`]s.
387    ///
388    /// Issues a single `Runtime.callFunctionOn` + `DOM.querySelectorAll`
389    /// call scoped to this element — not to the entire document.
390    ///
391    /// Returns an empty `Vec` when no children match (consistent with the JS
392    /// `querySelectorAll` contract).
393    ///
394    /// # Errors
395    ///
396    /// Returns [`BrowserError::StaleNode`] when the remote object has been
397    /// invalidated, or [`BrowserError::CdpError`] on transport failure.
398    pub async fn children_matching(&self, selector: &str) -> Result<Vec<Self>> {
399        let elements = timeout(self.cdp_timeout, self.element.find_elements(selector))
400            .await
401            .map_err(|_| BrowserError::Timeout {
402                operation: "NodeHandle::children_matching".to_string(),
403                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
404            })?
405            .map_err(|e| self.cdp_err_or_stale(&e, "children_matching"))?;
406
407        let selector_arc: Arc<str> = Arc::from(selector);
408        Ok(elements
409            .into_iter()
410            .map(|el| Self {
411                element: el,
412                selector: selector_arc.clone(),
413                cdp_timeout: self.cdp_timeout,
414            })
415            .collect())
416    }
417
418    /// Map a chromiumoxide `CdpError` to either [`BrowserError::StaleNode`]
419    /// (when the remote object reference has been invalidated) or
420    /// [`BrowserError::CdpError`] for all other failures.
421    fn cdp_err_or_stale(
422        &self,
423        err: &chromiumoxide::error::CdpError,
424        operation: &str,
425    ) -> BrowserError {
426        let msg = err.to_string();
427        if msg.contains("Cannot find object with id")
428            || msg.contains("context with specified id")
429            || msg.contains("Cannot find context")
430        {
431            BrowserError::StaleNode {
432                selector: self.selector.to_string(),
433            }
434        } else {
435            BrowserError::CdpError {
436                operation: operation.to_string(),
437                message: msg,
438            }
439        }
440    }
441}
442
443// ─── PageHandle ───────────────────────────────────────────────────────────────
444
445/// A handle to an open browser tab.
446///
447/// On drop the underlying page is closed automatically.
448///
449/// # Example
450///
451/// ```no_run
452/// use stygian_browser::{BrowserPool, BrowserConfig};
453/// use stygian_browser::page::WaitUntil;
454/// use std::time::Duration;
455///
456/// # async fn run() -> stygian_browser::error::Result<()> {
457/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
458/// let handle = pool.acquire().await?;
459/// let mut page = handle.browser().expect("valid browser").new_page().await?;
460/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
461/// let html = page.content().await?;
462/// drop(page); // closes the tab
463/// handle.release().await;
464/// # Ok(())
465/// # }
466/// ```
467pub struct PageHandle {
468    page: Page,
469    cdp_timeout: Duration,
470    /// HTTP status code of the most recent main-frame navigation, or `0` if not
471    /// yet captured.  Written atomically by the listener spawned in `navigate()`.
472    last_status_code: Arc<AtomicU16>,
473    /// Background task processing `Fetch.requestPaused` events. Aborted and
474    /// replaced each time `set_resource_filter` is called.
475    resource_filter_task: Option<tokio::task::JoinHandle<()>>,
476}
477
478impl PageHandle {
479    /// Wrap a raw chromiumoxide [`Page`] in a handle.
480    pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
481        Self {
482            page,
483            cdp_timeout,
484            last_status_code: Arc::new(AtomicU16::new(0)),
485            resource_filter_task: None,
486        }
487    }
488
489    /// Navigate to `url` and wait for `condition` within `nav_timeout`.
490    ///
491    /// # Errors
492    ///
493    /// Returns [`BrowserError::NavigationFailed`] if the navigation times out or
494    /// the CDP call fails.
495    pub async fn navigate(
496        &mut self,
497        url: &str,
498        condition: WaitUntil,
499        nav_timeout: Duration,
500    ) -> Result<()> {
501        self.setup_status_capture().await;
502        timeout(
503            nav_timeout,
504            self.navigate_inner(url, condition, nav_timeout),
505        )
506        .await
507        .map_err(|_| BrowserError::NavigationFailed {
508            url: url.to_string(),
509            reason: format!("navigation timed out after {nav_timeout:?}"),
510        })?
511    }
512
513    /// Reset the last status code and wire up the `Network.responseReceived`
514    /// listener before any navigation starts.  Errors are logged and swallowed
515    /// so that a missing network domain never blocks navigation.
516    async fn setup_status_capture(&self) {
517        use chromiumoxide::cdp::browser_protocol::network::{
518            EventResponseReceived, ResourceType as NetworkResourceType,
519        };
520        use futures::StreamExt;
521
522        // Reset so a stale code is not returned if the new navigation fails
523        // before the response headers arrive.
524        self.last_status_code.store(0, Ordering::Release);
525
526        // Subscribe *before* goto() — the listener runs in a detached task and
527        // stores the first Document-type response status atomically.
528        let page_for_listener = self.page.clone();
529        let status_capture = Arc::clone(&self.last_status_code);
530        match page_for_listener
531            .event_listener::<EventResponseReceived>()
532            .await
533        {
534            Ok(mut stream) => {
535                tokio::spawn(async move {
536                    while let Some(event) = stream.next().await {
537                        if event.r#type == NetworkResourceType::Document {
538                            let code = u16::try_from(event.response.status).unwrap_or(0);
539                            if code > 0 {
540                                status_capture.store(code, Ordering::Release);
541                            }
542                            break;
543                        }
544                    }
545                });
546            }
547            Err(e) => warn!("status-code capture unavailable: {e}"),
548        }
549    }
550
551    /// Subscribe to the appropriate CDP events, fire `goto`, then await
552    /// `condition`.  All subscriptions precede `goto` to eliminate the race
553    /// described in issue #7.
554    async fn navigate_inner(
555        &self,
556        url: &str,
557        condition: WaitUntil,
558        nav_timeout: Duration,
559    ) -> Result<()> {
560        use chromiumoxide::cdp::browser_protocol::page::{
561            EventDomContentEventFired, EventLoadEventFired,
562        };
563        use futures::StreamExt;
564
565        let url_owned = url.to_string();
566
567        let mut dom_events = match &condition {
568            WaitUntil::DomContentLoaded => Some(
569                self.page
570                    .event_listener::<EventDomContentEventFired>()
571                    .await
572                    .map_err(|e| BrowserError::NavigationFailed {
573                        url: url_owned.clone(),
574                        reason: e.to_string(),
575                    })?,
576            ),
577            _ => None,
578        };
579
580        let mut load_events = match &condition {
581            WaitUntil::NetworkIdle => Some(
582                self.page
583                    .event_listener::<EventLoadEventFired>()
584                    .await
585                    .map_err(|e| BrowserError::NavigationFailed {
586                        url: url_owned.clone(),
587                        reason: e.to_string(),
588                    })?,
589            ),
590            _ => None,
591        };
592
593        let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
594            Some(self.subscribe_inflight_counter().await)
595        } else {
596            None
597        };
598
599        self.page
600            .goto(url)
601            .await
602            .map_err(|e| BrowserError::NavigationFailed {
603                url: url_owned.clone(),
604                reason: e.to_string(),
605            })?;
606
607        match &condition {
608            WaitUntil::DomContentLoaded => {
609                if let Some(ref mut events) = dom_events {
610                    let _ = events.next().await;
611                }
612            }
613            WaitUntil::NetworkIdle => {
614                if let Some(ref mut events) = load_events {
615                    let _ = events.next().await;
616                }
617                if let Some(ref counter) = inflight {
618                    Self::wait_network_idle(counter).await;
619                }
620            }
621            WaitUntil::Selector(css) => {
622                self.wait_for_selector(css, nav_timeout).await?;
623            }
624        }
625        Ok(())
626    }
627
628    /// Spawn three detached tasks that maintain a signed in-flight request
629    /// counter via `Network.requestWillBeSent` (+1) and
630    /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
631    /// Returns the shared counter so the caller can poll it.
632    async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
633        use std::sync::atomic::AtomicI32;
634
635        use chromiumoxide::cdp::browser_protocol::network::{
636            EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
637        };
638        use futures::StreamExt;
639
640        let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
641        let pairs: [(Arc<AtomicI32>, i32); 3] = [
642            (Arc::clone(&counter), 1),
643            (Arc::clone(&counter), -1),
644            (Arc::clone(&counter), -1),
645        ];
646        let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
647
648        macro_rules! spawn_tracker {
649            ($page:expr, $event:ty, $c:expr, $delta:expr) => {
650                match $page.event_listener::<$event>().await {
651                    Ok(mut s) => {
652                        let c = $c;
653                        let d = $delta;
654                        tokio::spawn(async move {
655                            while s.next().await.is_some() {
656                                c.fetch_add(d, Ordering::Relaxed);
657                            }
658                        });
659                    }
660                    Err(e) => warn!("network-idle tracker unavailable: {e}"),
661                }
662            };
663        }
664
665        let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
666        spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
667        spawn_tracker!(p2, EventLoadingFinished, c2, d2);
668        spawn_tracker!(p3, EventLoadingFailed, c3, d3);
669
670        counter
671    }
672
673    /// Poll `counter` until ≤ 2 in-flight requests persist for 500 ms
674    /// (equivalent to Playwright's `networkidle2`).
675    async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
676        const IDLE_THRESHOLD: i32 = 2;
677        const SETTLE: Duration = Duration::from_millis(500);
678        loop {
679            if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
680                tokio::time::sleep(SETTLE).await;
681                if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
682                    break;
683                }
684            } else {
685                tokio::time::sleep(Duration::from_millis(50)).await;
686            }
687        }
688    }
689
690    /// Wait until `document.querySelector(selector)` is non-null (`timeout`).
691    ///
692    /// # Errors
693    ///
694    /// Returns [`BrowserError::NavigationFailed`] if the selector is not found
695    /// within the given timeout.
696    pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
697        let selector_owned = selector.to_string();
698        let poll = async {
699            loop {
700                if self.page.find_element(selector_owned.clone()).await.is_ok() {
701                    return Ok(());
702                }
703                tokio::time::sleep(Duration::from_millis(100)).await;
704            }
705        };
706
707        timeout(wait_timeout, poll)
708            .await
709            .map_err(|_| BrowserError::NavigationFailed {
710                url: String::new(),
711                reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
712            })?
713    }
714
715    /// Set a resource filter to block specific network request types.
716    ///
717    /// Enables `Fetch` interception and spawns a background task that continues
718    /// allowed requests and fails blocked ones with `BlockedByClient`. Any
719    /// previously set filter task is cancelled first.
720    ///
721    /// # Errors
722    ///
723    /// Returns a [`BrowserError::CdpError`] if the CDP call fails.
724    pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
725        use chromiumoxide::cdp::browser_protocol::fetch::{
726            ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
727            RequestPattern,
728        };
729        use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
730        use futures::StreamExt as _;
731
732        if filter.is_empty() {
733            return Ok(());
734        }
735
736        // Cancel any previously running filter task.
737        if let Some(task) = self.resource_filter_task.take() {
738            task.abort();
739        }
740
741        let pattern = RequestPattern::builder().url_pattern("*").build();
742        let params = EnableParams::builder()
743            .patterns(vec![pattern])
744            .handle_auth_requests(false)
745            .build();
746
747        timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
748            .await
749            .map_err(|_| BrowserError::Timeout {
750                operation: "Fetch.enable".to_string(),
751                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
752            })?
753            .map_err(|e| BrowserError::CdpError {
754                operation: "Fetch.enable".to_string(),
755                message: e.to_string(),
756            })?;
757
758        // Subscribe to requestPaused events and dispatch each one so navigation
759        // is never blocked. Without this handler Chrome holds every intercepted
760        // request indefinitely and the page hangs.
761        let mut events = self
762            .page
763            .event_listener::<EventRequestPaused>()
764            .await
765            .map_err(|e| BrowserError::CdpError {
766                operation: "Fetch.requestPaused subscribe".to_string(),
767                message: e.to_string(),
768            })?;
769
770        let page = self.page.clone();
771        debug!("Resource filter active: {:?}", filter);
772        let task = tokio::spawn(async move {
773            while let Some(event) = events.next().await {
774                let request_id = event.request_id.clone();
775                if filter.should_block(event.resource_type.as_ref()) {
776                    let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
777                    let _ = page.execute(params).await;
778                } else {
779                    let _ = page.execute(ContinueRequestParams::new(request_id)).await;
780                }
781            }
782        });
783
784        self.resource_filter_task = Some(task);
785        Ok(())
786    }
787
788    /// Return the current page URL (post-navigation, post-redirect).
789    ///
790    /// Delegates to the CDP `Target.getTargetInfo` binding already used
791    /// internally by [`save_cookies`](Self::save_cookies); no extra network
792    /// request is made.  Returns an empty string if the URL is not yet set
793    /// (e.g. on a blank tab before the first navigation).
794    ///
795    /// # Errors
796    ///
797    /// Returns [`BrowserError::CdpError`] if the underlying CDP call fails, or
798    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
799    ///
800    /// # Example
801    ///
802    /// ```no_run
803    /// use stygian_browser::{BrowserPool, BrowserConfig};
804    /// use stygian_browser::page::WaitUntil;
805    /// use std::time::Duration;
806    ///
807    /// # async fn run() -> stygian_browser::error::Result<()> {
808    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
809    /// let handle = pool.acquire().await?;
810    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
811    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
812    /// let url = page.url().await?;
813    /// println!("Final URL after redirects: {url}");
814    /// # Ok(())
815    /// # }
816    /// ```
817    pub async fn url(&self) -> Result<String> {
818        timeout(self.cdp_timeout, self.page.url())
819            .await
820            .map_err(|_| BrowserError::Timeout {
821                operation: "page.url".to_string(),
822                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
823            })?
824            .map_err(|e| BrowserError::CdpError {
825                operation: "page.url".to_string(),
826                message: e.to_string(),
827            })
828            .map(Option::unwrap_or_default)
829    }
830
831    /// Return the HTTP status code of the most recent main-frame navigation.
832    ///
833    /// The status is captured from the `Network.responseReceived` CDP event
834    /// wired up inside [`navigate`](Self::navigate), so it reflects the
835    /// *final* response after any server-side redirects.
836    ///
837    /// Returns `None` if the status was not captured — for example on `file://`
838    /// navigations, when [`navigate`](Self::navigate) has not yet been called,
839    /// or if the network event subscription failed.
840    ///
841    /// # Errors
842    ///
843    /// This method is infallible; the `Result` wrapper is kept for API
844    /// consistency with other `PageHandle` methods.
845    ///
846    /// # Example
847    ///
848    /// ```no_run
849    /// use stygian_browser::{BrowserPool, BrowserConfig};
850    /// use stygian_browser::page::WaitUntil;
851    /// use std::time::Duration;
852    ///
853    /// # async fn run() -> stygian_browser::error::Result<()> {
854    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
855    /// let handle = pool.acquire().await?;
856    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
857    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
858    /// if let Some(code) = page.status_code()? {
859    ///     println!("HTTP {code}");
860    /// }
861    /// # Ok(())
862    /// # }
863    /// ```
864    pub fn status_code(&self) -> Result<Option<u16>> {
865        let code = self.last_status_code.load(Ordering::Acquire);
866        Ok(if code == 0 { None } else { Some(code) })
867    }
868
869    /// Return the page's `<title>` text.
870    ///
871    /// # Errors
872    ///
873    /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
874    pub async fn title(&self) -> Result<String> {
875        timeout(self.cdp_timeout, self.page.get_title())
876            .await
877            .map_err(|_| BrowserError::Timeout {
878                operation: "get_title".to_string(),
879                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
880            })?
881            .map_err(|e| BrowserError::ScriptExecutionFailed {
882                script: "document.title".to_string(),
883                reason: e.to_string(),
884            })
885            .map(Option::unwrap_or_default)
886    }
887
888    /// Return the page's full outer HTML.
889    ///
890    /// # Errors
891    ///
892    /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
893    pub async fn content(&self) -> Result<String> {
894        timeout(self.cdp_timeout, self.page.content())
895            .await
896            .map_err(|_| BrowserError::Timeout {
897                operation: "page.content".to_string(),
898                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
899            })?
900            .map_err(|e| BrowserError::ScriptExecutionFailed {
901                script: "document.documentElement.outerHTML".to_string(),
902                reason: e.to_string(),
903            })
904    }
905
906    /// Query the live DOM for all elements matching `selector` and return
907    /// lightweight [`NodeHandle`]s backed by CDP `RemoteObjectId`s.
908    ///
909    /// No HTML serialisation occurs — the browser's in-memory DOM is queried
910    /// directly over the CDP connection, eliminating the `page.content()` +
911    /// `scraper::Html::parse_document` round-trip.
912    ///
913    /// Returns an empty `Vec` when no elements match (consistent with the JS
914    /// `querySelectorAll` contract — not an error).
915    ///
916    /// # Errors
917    ///
918    /// Returns [`BrowserError::CdpError`] if the CDP find call fails, or
919    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
920    ///
921    /// # Example
922    ///
923    /// ```no_run
924    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
925    /// use std::time::Duration;
926    ///
927    /// # async fn run() -> stygian_browser::error::Result<()> {
928    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
929    /// let handle = pool.acquire().await?;
930    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
931    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
932    ///
933    /// let nodes = page.query_selector_all("[data-ux]").await?;
934    /// for node in &nodes {
935    ///     let ux_type = node.attr("data-ux").await?;
936    ///     let text    = node.text_content().await?;
937    ///     println!("{ux_type:?}: {text}");
938    /// }
939    /// # Ok(())
940    /// # }
941    /// ```
942    pub async fn query_selector_all(&self, selector: &str) -> Result<Vec<NodeHandle>> {
943        let elements = timeout(self.cdp_timeout, self.page.find_elements(selector))
944            .await
945            .map_err(|_| BrowserError::Timeout {
946                operation: "PageHandle::query_selector_all".to_string(),
947                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
948            })?
949            .map_err(|e| BrowserError::CdpError {
950                operation: "PageHandle::query_selector_all".to_string(),
951                message: e.to_string(),
952            })?;
953
954        let selector_arc: Arc<str> = Arc::from(selector);
955        Ok(elements
956            .into_iter()
957            .map(|el| NodeHandle {
958                element: el,
959                selector: selector_arc.clone(),
960                cdp_timeout: self.cdp_timeout,
961            })
962            .collect())
963    }
964
965    /// Evaluate arbitrary JavaScript and return the result as `T`.
966    ///
967    /// # Errors
968    ///
969    /// Returns [`BrowserError::ScriptExecutionFailed`] on eval failure or
970    /// deserialization error.
971    pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
972        let script_owned = script.to_string();
973        timeout(self.cdp_timeout, self.page.evaluate(script))
974            .await
975            .map_err(|_| BrowserError::Timeout {
976                operation: "page.evaluate".to_string(),
977                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
978            })?
979            .map_err(|e| BrowserError::ScriptExecutionFailed {
980                script: script_owned.clone(),
981                reason: e.to_string(),
982            })?
983            .into_value::<T>()
984            .map_err(|e| BrowserError::ScriptExecutionFailed {
985                script: script_owned,
986                reason: e.to_string(),
987            })
988    }
989
990    /// Save all cookies for the current page's origin.
991    ///
992    /// # Errors
993    ///
994    /// Returns [`BrowserError::CdpError`] if the CDP call fails.
995    pub async fn save_cookies(
996        &self,
997    ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
998        use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
999
1000        let url = self
1001            .page
1002            .url()
1003            .await
1004            .map_err(|e| BrowserError::CdpError {
1005                operation: "page.url".to_string(),
1006                message: e.to_string(),
1007            })?
1008            .unwrap_or_default();
1009
1010        timeout(
1011            self.cdp_timeout,
1012            self.page
1013                .execute(GetCookiesParams::builder().urls(vec![url]).build()),
1014        )
1015        .await
1016        .map_err(|_| BrowserError::Timeout {
1017            operation: "Network.getCookies".to_string(),
1018            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1019        })?
1020        .map_err(|e| BrowserError::CdpError {
1021            operation: "Network.getCookies".to_string(),
1022            message: e.to_string(),
1023        })
1024        .map(|r| r.cookies.clone())
1025    }
1026
1027    /// Inject cookies into the current page.
1028    ///
1029    /// Seeds session tokens or other state without needing a full
1030    /// [`SessionSnapshot`][crate::session::SessionSnapshot] and without
1031    /// requiring a direct `chromiumoxide` dependency in calling code.
1032    ///
1033    /// Individual cookie failures are logged as warnings and do not abort the
1034    /// remaining cookies.
1035    ///
1036    /// # Errors
1037    ///
1038    /// Returns [`BrowserError::Timeout`] if a single `Network.setCookie` CDP
1039    /// call exceeds `cdp_timeout`.
1040    ///
1041    /// # Example
1042    ///
1043    /// ```no_run
1044    /// use stygian_browser::{BrowserPool, BrowserConfig};
1045    /// use stygian_browser::session::SessionCookie;
1046    /// use std::time::Duration;
1047    ///
1048    /// # async fn run() -> stygian_browser::error::Result<()> {
1049    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1050    /// let handle = pool.acquire().await?;
1051    /// let page = handle.browser().expect("valid browser").new_page().await?;
1052    /// let cookies = vec![SessionCookie {
1053    ///     name: "session".to_string(),
1054    ///     value: "abc123".to_string(),
1055    ///     domain: ".example.com".to_string(),
1056    ///     path: "/".to_string(),
1057    ///     expires: -1.0,
1058    ///     http_only: true,
1059    ///     secure: true,
1060    ///     same_site: "Lax".to_string(),
1061    /// }];
1062    /// page.inject_cookies(&cookies).await?;
1063    /// # Ok(())
1064    /// # }
1065    /// ```
1066    pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
1067        use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
1068
1069        for cookie in cookies {
1070            let params = match SetCookieParams::builder()
1071                .name(cookie.name.clone())
1072                .value(cookie.value.clone())
1073                .domain(cookie.domain.clone())
1074                .path(cookie.path.clone())
1075                .http_only(cookie.http_only)
1076                .secure(cookie.secure)
1077                .build()
1078            {
1079                Ok(p) => p,
1080                Err(e) => {
1081                    warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
1082                    continue;
1083                }
1084            };
1085
1086            match timeout(self.cdp_timeout, self.page.execute(params)).await {
1087                Err(_) => {
1088                    warn!(
1089                        cookie = %cookie.name,
1090                        timeout_ms = self.cdp_timeout.as_millis(),
1091                        "Timed out injecting cookie"
1092                    );
1093                }
1094                Ok(Err(e)) => {
1095                    warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
1096                }
1097                Ok(Ok(_)) => {}
1098            }
1099        }
1100
1101        debug!(count = cookies.len(), "Cookies injected");
1102        Ok(())
1103    }
1104
1105    /// Capture a screenshot of the current page as PNG bytes.
1106    ///
1107    /// The screenshot is full-page by default (viewport clipped to the rendered
1108    /// layout area).  Save the returned bytes to a `.png` file or process
1109    /// them in-memory.
1110    ///
1111    /// # Errors
1112    ///
1113    /// Returns [`BrowserError::CdpError`] if the CDP `Page.captureScreenshot`
1114    /// command fails, or [`BrowserError::Timeout`] if it exceeds
1115    /// `cdp_timeout`.
1116    ///
1117    /// # Example
1118    ///
1119    /// ```no_run
1120    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1121    /// use std::{time::Duration, fs};
1122    ///
1123    /// # async fn run() -> stygian_browser::error::Result<()> {
1124    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1125    /// let handle = pool.acquire().await?;
1126    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1127    /// page.navigate("https://example.com", WaitUntil::Selector("body".to_string()), Duration::from_secs(30)).await?;
1128    /// let png = page.screenshot().await?;
1129    /// fs::write("screenshot.png", &png).unwrap();
1130    /// # Ok(())
1131    /// # }
1132    /// ```
1133    pub async fn screenshot(&self) -> Result<Vec<u8>> {
1134        use chromiumoxide::page::ScreenshotParams;
1135
1136        let params = ScreenshotParams::builder().full_page(true).build();
1137
1138        timeout(self.cdp_timeout, self.page.screenshot(params))
1139            .await
1140            .map_err(|_| BrowserError::Timeout {
1141                operation: "Page.captureScreenshot".to_string(),
1142                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1143            })?
1144            .map_err(|e| BrowserError::CdpError {
1145                operation: "Page.captureScreenshot".to_string(),
1146                message: e.to_string(),
1147            })
1148    }
1149
1150    /// Borrow the underlying chromiumoxide [`Page`].
1151    pub const fn inner(&self) -> &Page {
1152        &self.page
1153    }
1154
1155    /// Close this page (tab).
1156    ///
1157    /// Called automatically on drop; explicit call avoids suppressing the error.
1158    pub async fn close(self) -> Result<()> {
1159        timeout(Duration::from_secs(5), self.page.clone().close())
1160            .await
1161            .map_err(|_| BrowserError::Timeout {
1162                operation: "page.close".to_string(),
1163                duration_ms: 5000,
1164            })?
1165            .map_err(|e| BrowserError::CdpError {
1166                operation: "page.close".to_string(),
1167                message: e.to_string(),
1168            })
1169    }
1170}
1171
1172// ─── Stealth diagnostics ──────────────────────────────────────────────────────
1173
1174#[cfg(feature = "stealth")]
1175impl PageHandle {
1176    /// Run all built-in stealth detection checks against the current page.
1177    ///
1178    /// Iterates [`crate::diagnostic::all_checks`], evaluates each check's
1179    /// JavaScript via CDP `Runtime.evaluate`, and returns an aggregate
1180    /// [`crate::diagnostic::DiagnosticReport`].
1181    ///
1182    /// Failed scripts (due to JS exceptions or deserialization errors) are
1183    /// recorded as failing checks and do **not** abort the whole run.
1184    ///
1185    /// # Errors
1186    ///
1187    /// Returns an error only if the underlying CDP transport fails entirely.
1188    /// Individual check failures are captured in the report.
1189    ///
1190    /// # Example
1191    ///
1192    /// ```no_run
1193    /// # async fn run() -> stygian_browser::error::Result<()> {
1194    /// use stygian_browser::{BrowserPool, BrowserConfig};
1195    /// use stygian_browser::page::WaitUntil;
1196    /// use std::time::Duration;
1197    ///
1198    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1199    /// let handle = pool.acquire().await?;
1200    /// let browser = handle.browser().expect("valid browser");
1201    /// let mut page = browser.new_page().await?;
1202    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(10)).await?;
1203    ///
1204    /// let report = page.verify_stealth().await?;
1205    /// println!("Stealth: {}/{} checks passed", report.passed_count, report.checks.len());
1206    /// for failure in report.failures() {
1207    ///     eprintln!("  FAIL  {}: {}", failure.description, failure.details);
1208    /// }
1209    /// # Ok(())
1210    /// # }
1211    /// ```
1212    pub async fn verify_stealth(&self) -> Result<crate::diagnostic::DiagnosticReport> {
1213        use crate::diagnostic::{CheckResult, DiagnosticReport, all_checks};
1214
1215        let mut results: Vec<CheckResult> = Vec::new();
1216
1217        for check in all_checks() {
1218            let result = match self.eval::<String>(check.script).await {
1219                Ok(json) => check.parse_output(&json),
1220                Err(e) => {
1221                    tracing::warn!(
1222                        check = ?check.id,
1223                        error = %e,
1224                        "stealth check script failed during evaluation"
1225                    );
1226                    CheckResult {
1227                        id: check.id,
1228                        description: check.description.to_string(),
1229                        passed: false,
1230                        details: format!("script error: {e}"),
1231                    }
1232                }
1233            };
1234            tracing::debug!(
1235                check = ?result.id,
1236                passed = result.passed,
1237                details = %result.details,
1238                "stealth check result"
1239            );
1240            results.push(result);
1241        }
1242
1243        Ok(DiagnosticReport::new(results))
1244    }
1245}
1246
1247impl Drop for PageHandle {
1248    fn drop(&mut self) {
1249        warn!("PageHandle dropped without explicit close(); spawning cleanup task");
1250        // chromiumoxide Page does not implement close on Drop, so we spawn
1251        // a fire-and-forget task. The page ref is already owned; we need to
1252        // swap it out. We clone the Page handle (it's Arc-backed internally).
1253        let page = self.page.clone();
1254        tokio::spawn(async move {
1255            let _ = page.close().await;
1256        });
1257    }
1258}
1259
1260// ─── Tests ────────────────────────────────────────────────────────────────────
1261
1262#[cfg(test)]
1263mod tests {
1264    use super::*;
1265
1266    #[test]
1267    fn resource_filter_block_media_blocks_image() {
1268        let filter = ResourceFilter::block_media();
1269        assert!(filter.should_block("Image"));
1270        assert!(filter.should_block("Font"));
1271        assert!(filter.should_block("Stylesheet"));
1272        assert!(filter.should_block("Media"));
1273        assert!(!filter.should_block("Script"));
1274        assert!(!filter.should_block("XHR"));
1275    }
1276
1277    #[test]
1278    fn resource_filter_case_insensitive() {
1279        let filter = ResourceFilter::block_images_and_fonts();
1280        assert!(filter.should_block("image")); // lowercase
1281        assert!(filter.should_block("IMAGE")); // uppercase
1282        assert!(!filter.should_block("Stylesheet"));
1283    }
1284
1285    #[test]
1286    fn resource_filter_builder_chain() {
1287        let filter = ResourceFilter::default()
1288            .block(ResourceType::Image)
1289            .block(ResourceType::Font);
1290        assert!(filter.should_block("Image"));
1291        assert!(filter.should_block("Font"));
1292        assert!(!filter.should_block("Stylesheet"));
1293    }
1294
1295    #[test]
1296    fn resource_filter_dedup_block() {
1297        let filter = ResourceFilter::default()
1298            .block(ResourceType::Image)
1299            .block(ResourceType::Image); // duplicate
1300        assert_eq!(filter.blocked.len(), 1);
1301    }
1302
1303    #[test]
1304    fn resource_filter_is_empty_when_default() {
1305        assert!(ResourceFilter::default().is_empty());
1306        assert!(!ResourceFilter::block_media().is_empty());
1307    }
1308
1309    #[test]
1310    fn wait_until_selector_stores_string() {
1311        let w = WaitUntil::Selector("#foo".to_string());
1312        assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
1313    }
1314
1315    #[test]
1316    fn resource_type_cdp_str() {
1317        assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
1318        assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
1319        assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
1320        assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
1321    }
1322
1323    /// `PageHandle` must be `Send + Sync` for use across thread boundaries.
1324    #[test]
1325    fn page_handle_is_send_sync() {
1326        fn assert_send<T: Send>() {}
1327        fn assert_sync<T: Sync>() {}
1328        assert_send::<PageHandle>();
1329        assert_sync::<PageHandle>();
1330    }
1331
1332    /// The status-code sentinel (0 = "not yet captured") and the conversion to
1333    /// `Option<u16>` are pure-logic invariants testable without a live browser.
1334    #[test]
1335    fn status_code_sentinel_zero_maps_to_none() {
1336        use std::sync::atomic::{AtomicU16, Ordering};
1337        let atom = AtomicU16::new(0);
1338        let code = atom.load(Ordering::Acquire);
1339        assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
1340    }
1341
1342    #[test]
1343    fn status_code_non_zero_maps_to_some() {
1344        use std::sync::atomic::{AtomicU16, Ordering};
1345        for &expected in &[200u16, 301, 404, 503] {
1346            let atom = AtomicU16::new(expected);
1347            let code = atom.load(Ordering::Acquire);
1348            assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
1349        }
1350    }
1351
1352    // ── NodeHandle pure-logic tests ───────────────────────────────────────────
1353
1354    /// `attr_map` relies on `chunks_exact(2)` — verify the pairing logic is
1355    /// correct without a live browser by exercising it directly.
1356    #[test]
1357    fn attr_map_chunking_pairs_correctly() {
1358        let flat = [
1359            "id".to_string(),
1360            "main".to_string(),
1361            "data-ux".to_string(),
1362            "Section".to_string(),
1363            "class".to_string(),
1364            "container".to_string(),
1365        ];
1366        let mut map = std::collections::HashMap::with_capacity(flat.len() / 2);
1367        for pair in flat.chunks_exact(2) {
1368            if let [name, value] = pair {
1369                map.insert(name.clone(), value.clone());
1370            }
1371        }
1372        assert_eq!(map.get("id").map(String::as_str), Some("main"));
1373        assert_eq!(map.get("data-ux").map(String::as_str), Some("Section"));
1374        assert_eq!(map.get("class").map(String::as_str), Some("container"));
1375        assert_eq!(map.len(), 3);
1376    }
1377
1378    /// Odd-length flat attribute lists (malformed CDP response) are handled
1379    /// gracefully — the trailing element is silently ignored.
1380    #[test]
1381    fn attr_map_chunking_ignores_odd_trailing() {
1382        let flat = ["orphan".to_string()]; // no value
1383        let mut map = std::collections::HashMap::new();
1384        for pair in flat.chunks_exact(2) {
1385            if let [name, value] = pair {
1386                map.insert(name.clone(), value.clone());
1387            }
1388        }
1389        assert!(map.is_empty());
1390    }
1391
1392    /// Empty flat list → empty map.
1393    #[test]
1394    fn attr_map_chunking_empty_input() {
1395        let flat: Vec<String> = vec![];
1396        let map: std::collections::HashMap<String, String> = flat
1397            .chunks_exact(2)
1398            .filter_map(|pair| {
1399                if let [name, value] = pair {
1400                    Some((name.clone(), value.clone()))
1401                } else {
1402                    None
1403                }
1404            })
1405            .collect();
1406        assert!(map.is_empty());
1407    }
1408
1409    /// `ancestors` JSON parsing: valid input round-trips correctly.
1410    #[test]
1411    fn ancestors_json_parse_round_trip() -> std::result::Result<(), serde_json::Error> {
1412        let json = r#"["p","article","body","html"]"#;
1413        let result: Vec<String> = serde_json::from_str(json)?;
1414        assert_eq!(result, ["p", "article", "body", "html"]);
1415        Ok(())
1416    }
1417
1418    /// `ancestors` JSON parsing: empty array (no parent) is fine.
1419    #[test]
1420    fn ancestors_json_parse_empty() -> std::result::Result<(), serde_json::Error> {
1421        let json = "[]";
1422        let result: Vec<String> = serde_json::from_str(json)?;
1423        assert!(result.is_empty());
1424        Ok(())
1425    }
1426}