Skip to main content

stygian_browser/
page.rs

1//! Page and browsing context management for isolated, parallel scraping
2//!
3//! Each `BrowserContext` (future) is an incognito-style isolation boundary (separate
4//! cookies, localStorage, cache).  Each context can contain many [`PageHandle`]s
5//! (tabs).  Both types clean up their CDP resources automatically on drop.
6//!
7//! ## Resource blocking
8//!
9//! Pass a [`ResourceFilter`] to [`PageHandle::set_resource_filter`] to intercept
10//! and block specific request types (images, fonts, CSS) before page load —
11//! significantly reducing page load times for text-only scraping.
12//!
13//! ## Wait strategies
14//!
15//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
16//! - `DomContentLoaded` — fires when the HTML is parsed
17//! - `NetworkIdle` — fires when there are ≤2 in-flight requests for 500 ms
18//! - `Selector(css)` — fires when a CSS selector matches an element
19//!
20//! # Example
21//!
22//! ```no_run
23//! use stygian_browser::{BrowserPool, BrowserConfig};
24//! use stygian_browser::page::{ResourceFilter, WaitUntil};
25//! use std::time::Duration;
26//!
27//! # async fn run() -> stygian_browser::error::Result<()> {
28//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
29//! let handle = pool.acquire().await?;
30//!
31//! let mut page = handle.browser().expect("valid browser").new_page().await?;
32//! page.set_resource_filter(ResourceFilter::block_media()).await?;
33//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
34//! let title = page.title().await?;
35//! println!("title: {title}");
36//! handle.release().await;
37//! # Ok(())
38//! # }
39//! ```
40
41use std::collections::HashMap;
42use std::sync::{
43    Arc,
44    atomic::{AtomicU16, Ordering},
45};
46use std::time::Duration;
47
48use chromiumoxide::Page;
49use tokio::time::timeout;
50use tracing::{debug, warn};
51
52use crate::error::{BrowserError, Result};
53
54// ─── ResourceType ─────────────────────────────────────────────────────────────
55
56/// CDP resource types that can be intercepted.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub enum ResourceType {
59    /// `<img>`, `<picture>`, background images
60    Image,
61    /// Web fonts loaded via CSS `@font-face`
62    Font,
63    /// External CSS stylesheets
64    Stylesheet,
65    /// Media files (audio/video)
66    Media,
67}
68
69impl ResourceType {
70    /// Returns the string used in CDP `Network.requestIntercepted` events.
71    pub const fn as_cdp_str(&self) -> &'static str {
72        match self {
73            Self::Image => "Image",
74            Self::Font => "Font",
75            Self::Stylesheet => "Stylesheet",
76            Self::Media => "Media",
77        }
78    }
79}
80
81// ─── ResourceFilter ───────────────────────────────────────────────────────────
82
83/// Set of resource types to block from loading.
84///
85/// # Example
86///
87/// ```
88/// use stygian_browser::page::ResourceFilter;
89/// let filter = ResourceFilter::block_media();
90/// assert!(filter.should_block("Image"));
91/// ```
92#[derive(Debug, Clone, Default)]
93pub struct ResourceFilter {
94    blocked: Vec<ResourceType>,
95}
96
97impl ResourceFilter {
98    /// Block all media resources (images, fonts, CSS, audio/video).
99    pub fn block_media() -> Self {
100        Self {
101            blocked: vec![
102                ResourceType::Image,
103                ResourceType::Font,
104                ResourceType::Stylesheet,
105                ResourceType::Media,
106            ],
107        }
108    }
109
110    /// Block only images and fonts (keep styles for layout-sensitive work).
111    pub fn block_images_and_fonts() -> Self {
112        Self {
113            blocked: vec![ResourceType::Image, ResourceType::Font],
114        }
115    }
116
117    /// Add a resource type to the block list.
118    #[must_use]
119    pub fn block(mut self, resource: ResourceType) -> Self {
120        if !self.blocked.contains(&resource) {
121            self.blocked.push(resource);
122        }
123        self
124    }
125
126    /// Returns `true` if the given CDP resource type string should be blocked.
127    pub fn should_block(&self, cdp_type: &str) -> bool {
128        self.blocked
129            .iter()
130            .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
131    }
132
133    /// Returns `true` if no resource types are blocked.
134    pub const fn is_empty(&self) -> bool {
135        self.blocked.is_empty()
136    }
137}
138
139// ─── WaitUntil ────────────────────────────────────────────────────────────────
140
141/// Condition to wait for after a navigation.
142///
143/// # Example
144///
145/// ```
146/// use stygian_browser::page::WaitUntil;
147/// let w = WaitUntil::Selector("#main".to_string());
148/// assert!(matches!(w, WaitUntil::Selector(_)));
149/// ```
150#[derive(Debug, Clone)]
151pub enum WaitUntil {
152    /// Wait for the `Page.domContentEventFired` CDP event — fires when the HTML
153    /// document has been fully parsed and the DOM is ready, before subresources
154    /// such as images and stylesheets finish loading.
155    DomContentLoaded,
156    /// Wait for the `Page.loadEventFired` CDP event **and** then wait until no
157    /// more than 2 network requests are in-flight for at least 500 ms
158    /// (equivalent to Playwright's `networkidle2`).
159    NetworkIdle,
160    /// Wait until `document.querySelector(selector)` returns a non-null element.
161    Selector(String),
162}
163
164// ─── NodeHandle ───────────────────────────────────────────────────────────────
165
166/// A handle to a live DOM node backed by a CDP `RemoteObjectId`.
167///
168/// Obtained via [`PageHandle::query_selector_all`].  Each method issues one or
169/// more CDP `Runtime.callFunctionOn` calls against the held V8 remote object
170/// reference — no HTML serialisation occurs.
171///
172/// A handle becomes **stale** after page navigation or if the underlying DOM
173/// node is removed.  Stale calls return [`BrowserError::StaleNode`] so callers
174/// can distinguish them from other CDP failures.
175///
176/// # Example
177///
178/// ```no_run
179/// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
180/// use std::time::Duration;
181///
182/// # async fn run() -> stygian_browser::error::Result<()> {
183/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
184/// let handle = pool.acquire().await?;
185/// let mut page = handle.browser().expect("valid browser").new_page().await?;
186/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
187///
188/// for node in page.query_selector_all("a[href]").await? {
189///     let href = node.attr("href").await?;
190///     let text = node.text_content().await?;
191///     println!("{text}: {href:?}");
192/// }
193/// # Ok(())
194/// # }
195/// ```
196pub struct NodeHandle {
197    element: chromiumoxide::element::Element,
198    /// Original CSS selector — preserved for stale-node error messages only.
199    selector: String,
200    cdp_timeout: Duration,
201}
202
203impl NodeHandle {
204    /// Return a single attribute value, or `None` if the attribute is absent.
205    ///
206    /// Issues one `Runtime.callFunctionOn` CDP call (`el.getAttribute(name)`).
207    ///
208    /// # Errors
209    ///
210    /// Returns [`BrowserError::StaleNode`] when the remote object has been
211    /// invalidated, or [`BrowserError::Timeout`] / [`BrowserError::CdpError`]
212    /// on transport-level failures.
213    pub async fn attr(&self, name: &str) -> Result<Option<String>> {
214        timeout(self.cdp_timeout, self.element.attribute(name))
215            .await
216            .map_err(|_| BrowserError::Timeout {
217                operation: "NodeHandle::attr".to_string(),
218                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
219            })?
220            .map_err(|e| self.cdp_err_or_stale(&e, "attr"))
221    }
222
223    /// Return all attributes as a `HashMap<name, value>` in a **single**
224    /// CDP round-trip.
225    ///
226    /// Uses `DOM.getAttributes` (via the chromiumoxide `attributes()` API)
227    /// which returns a flat `[name, value, name, value, …]` list from the node
228    /// description — no per-attribute calls are needed.
229    ///
230    /// # Errors
231    ///
232    /// Returns [`BrowserError::StaleNode`] when the remote object has been
233    /// invalidated.
234    pub async fn attr_map(&self) -> Result<HashMap<String, String>> {
235        let flat = timeout(self.cdp_timeout, self.element.attributes())
236            .await
237            .map_err(|_| BrowserError::Timeout {
238                operation: "NodeHandle::attr_map".to_string(),
239                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
240            })?
241            .map_err(|e| self.cdp_err_or_stale(&e, "attr_map"))?;
242
243        let mut map = HashMap::with_capacity(flat.len() / 2);
244        for pair in flat.chunks_exact(2) {
245            if let [name, value] = pair {
246                map.insert(name.clone(), value.clone());
247            }
248        }
249        Ok(map)
250    }
251
252    /// Return the element's `textContent` (all text inside, no markup).
253    ///
254    /// Returns an empty string when the property is absent or null.
255    ///
256    /// # Errors
257    ///
258    /// Returns [`BrowserError::StaleNode`] when the remote object has been
259    /// invalidated.
260    pub async fn text_content(&self) -> Result<String> {
261        timeout(self.cdp_timeout, self.element.inner_text())
262            .await
263            .map_err(|_| BrowserError::Timeout {
264                operation: "NodeHandle::text_content".to_string(),
265                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
266            })?
267            .map_err(|e| self.cdp_err_or_stale(&e, "text_content"))
268            .map(Option::unwrap_or_default)
269    }
270
271    /// Return the element's `innerHTML`.
272    ///
273    /// Returns an empty string when the property is absent or null.
274    ///
275    /// # Errors
276    ///
277    /// Returns [`BrowserError::StaleNode`] when the remote object has been
278    /// invalidated.
279    pub async fn inner_html(&self) -> Result<String> {
280        timeout(self.cdp_timeout, self.element.inner_html())
281            .await
282            .map_err(|_| BrowserError::Timeout {
283                operation: "NodeHandle::inner_html".to_string(),
284                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
285            })?
286            .map_err(|e| self.cdp_err_or_stale(&e, "inner_html"))
287            .map(Option::unwrap_or_default)
288    }
289
290    /// Return the element's `outerHTML`.
291    ///
292    /// Returns an empty string when the property is absent or null.
293    ///
294    /// # Errors
295    ///
296    /// Returns [`BrowserError::StaleNode`] when the remote object has been
297    /// invalidated.
298    pub async fn outer_html(&self) -> Result<String> {
299        timeout(self.cdp_timeout, self.element.outer_html())
300            .await
301            .map_err(|_| BrowserError::Timeout {
302                operation: "NodeHandle::outer_html".to_string(),
303                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
304            })?
305            .map_err(|e| self.cdp_err_or_stale(&e, "outer_html"))
306            .map(Option::unwrap_or_default)
307    }
308
309    /// Return the ancestor tag-name chain, root-last.
310    ///
311    /// Executes a single `Runtime.callFunctionOn` JavaScript function that
312    /// walks `parentElement` and collects tag names — no repeated CDP calls.
313    ///
314    /// ```text
315    /// // for <span> inside <p> inside <article> inside <body> inside <html>
316    /// ["p", "article", "body", "html"]
317    /// ```
318    ///
319    /// # Errors
320    ///
321    /// Returns [`BrowserError::StaleNode`] when the remote object has been
322    /// invalidated, or [`BrowserError::ScriptExecutionFailed`] when the
323    /// JSON returned by the script cannot be parsed.
324    pub async fn ancestors(&self) -> Result<Vec<String>> {
325        let returns = timeout(
326            self.cdp_timeout,
327            self.element.call_js_fn(
328                r"function() {
329                    const a = [];
330                    let n = this.parentElement;
331                    while (n) { a.push(n.tagName.toLowerCase()); n = n.parentElement; }
332                    return JSON.stringify(a);
333                }",
334                false,
335            ),
336        )
337        .await
338        .map_err(|_| BrowserError::Timeout {
339            operation: "NodeHandle::ancestors".to_string(),
340            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
341        })?
342        .map_err(|e| self.cdp_err_or_stale(&e, "ancestors"))?;
343
344        let json_str = returns
345            .result
346            .value
347            .as_ref()
348            .and_then(|v| v.as_str())
349            .unwrap_or("[]");
350
351        serde_json::from_str::<Vec<String>>(json_str).map_err(|e| {
352            BrowserError::ScriptExecutionFailed {
353                script: "NodeHandle::ancestors".to_string(),
354                reason: e.to_string(),
355            }
356        })
357    }
358
359    /// Return child elements matching `selector` as new [`NodeHandle`]s.
360    ///
361    /// Issues a single `Runtime.callFunctionOn` + `DOM.querySelectorAll`
362    /// call scoped to this element — not to the entire document.
363    ///
364    /// Returns an empty `Vec` when no children match (consistent with the JS
365    /// `querySelectorAll` contract).
366    ///
367    /// # Errors
368    ///
369    /// Returns [`BrowserError::StaleNode`] when the remote object has been
370    /// invalidated, or [`BrowserError::CdpError`] on transport failure.
371    pub async fn children_matching(&self, selector: &str) -> Result<Vec<Self>> {
372        let elements = timeout(self.cdp_timeout, self.element.find_elements(selector))
373            .await
374            .map_err(|_| BrowserError::Timeout {
375                operation: "NodeHandle::children_matching".to_string(),
376                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
377            })?
378            .map_err(|e| self.cdp_err_or_stale(&e, "children_matching"))?;
379
380        Ok(elements
381            .into_iter()
382            .map(|el| Self {
383                element: el,
384                selector: selector.to_string(),
385                cdp_timeout: self.cdp_timeout,
386            })
387            .collect())
388    }
389
390    /// Map a chromiumoxide `CdpError` to either [`BrowserError::StaleNode`]
391    /// (when the remote object reference has been invalidated) or
392    /// [`BrowserError::CdpError`] for all other failures.
393    fn cdp_err_or_stale(
394        &self,
395        err: &chromiumoxide::error::CdpError,
396        operation: &str,
397    ) -> BrowserError {
398        let msg = err.to_string();
399        if msg.contains("Cannot find object with id")
400            || msg.contains("context with specified id")
401            || msg.contains("Cannot find context")
402        {
403            BrowserError::StaleNode {
404                selector: self.selector.clone(),
405            }
406        } else {
407            BrowserError::CdpError {
408                operation: operation.to_string(),
409                message: msg,
410            }
411        }
412    }
413}
414
415// ─── PageHandle ───────────────────────────────────────────────────────────────
416
417/// A handle to an open browser tab.
418///
419/// On drop the underlying page is closed automatically.
420///
421/// # Example
422///
423/// ```no_run
424/// use stygian_browser::{BrowserPool, BrowserConfig};
425/// use stygian_browser::page::WaitUntil;
426/// use std::time::Duration;
427///
428/// # async fn run() -> stygian_browser::error::Result<()> {
429/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
430/// let handle = pool.acquire().await?;
431/// let mut page = handle.browser().expect("valid browser").new_page().await?;
432/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
433/// let html = page.content().await?;
434/// drop(page); // closes the tab
435/// handle.release().await;
436/// # Ok(())
437/// # }
438/// ```
439pub struct PageHandle {
440    page: Page,
441    cdp_timeout: Duration,
442    /// HTTP status code of the most recent main-frame navigation, or `0` if not
443    /// yet captured.  Written atomically by the listener spawned in `navigate()`.
444    last_status_code: Arc<AtomicU16>,
445    /// Background task processing `Fetch.requestPaused` events. Aborted and
446    /// replaced each time `set_resource_filter` is called.
447    resource_filter_task: Option<tokio::task::JoinHandle<()>>,
448}
449
450impl PageHandle {
451    /// Wrap a raw chromiumoxide [`Page`] in a handle.
452    pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
453        Self {
454            page,
455            cdp_timeout,
456            last_status_code: Arc::new(AtomicU16::new(0)),
457            resource_filter_task: None,
458        }
459    }
460
461    /// Navigate to `url` and wait for `condition` within `nav_timeout`.
462    ///
463    /// # Errors
464    ///
465    /// Returns [`BrowserError::NavigationFailed`] if the navigation times out or
466    /// the CDP call fails.
467    pub async fn navigate(
468        &mut self,
469        url: &str,
470        condition: WaitUntil,
471        nav_timeout: Duration,
472    ) -> Result<()> {
473        self.setup_status_capture().await;
474        timeout(
475            nav_timeout,
476            self.navigate_inner(url, condition, nav_timeout),
477        )
478        .await
479        .map_err(|_| BrowserError::NavigationFailed {
480            url: url.to_string(),
481            reason: format!("navigation timed out after {nav_timeout:?}"),
482        })?
483    }
484
485    /// Reset the last status code and wire up the `Network.responseReceived`
486    /// listener before any navigation starts.  Errors are logged and swallowed
487    /// so that a missing network domain never blocks navigation.
488    async fn setup_status_capture(&self) {
489        use chromiumoxide::cdp::browser_protocol::network::{
490            EventResponseReceived, ResourceType as NetworkResourceType,
491        };
492        use futures::StreamExt;
493
494        // Reset so a stale code is not returned if the new navigation fails
495        // before the response headers arrive.
496        self.last_status_code.store(0, Ordering::Release);
497
498        // Subscribe *before* goto() — the listener runs in a detached task and
499        // stores the first Document-type response status atomically.
500        let page_for_listener = self.page.clone();
501        let status_capture = Arc::clone(&self.last_status_code);
502        match page_for_listener
503            .event_listener::<EventResponseReceived>()
504            .await
505        {
506            Ok(mut stream) => {
507                tokio::spawn(async move {
508                    while let Some(event) = stream.next().await {
509                        if event.r#type == NetworkResourceType::Document {
510                            let code = u16::try_from(event.response.status).unwrap_or(0);
511                            if code > 0 {
512                                status_capture.store(code, Ordering::Release);
513                            }
514                            break;
515                        }
516                    }
517                });
518            }
519            Err(e) => warn!("status-code capture unavailable: {e}"),
520        }
521    }
522
523    /// Subscribe to the appropriate CDP events, fire `goto`, then await
524    /// `condition`.  All subscriptions precede `goto` to eliminate the race
525    /// described in issue #7.
526    async fn navigate_inner(
527        &self,
528        url: &str,
529        condition: WaitUntil,
530        nav_timeout: Duration,
531    ) -> Result<()> {
532        use chromiumoxide::cdp::browser_protocol::page::{
533            EventDomContentEventFired, EventLoadEventFired,
534        };
535        use futures::StreamExt;
536
537        let url_owned = url.to_string();
538
539        let mut dom_events = match &condition {
540            WaitUntil::DomContentLoaded => Some(
541                self.page
542                    .event_listener::<EventDomContentEventFired>()
543                    .await
544                    .map_err(|e| BrowserError::NavigationFailed {
545                        url: url_owned.clone(),
546                        reason: e.to_string(),
547                    })?,
548            ),
549            _ => None,
550        };
551
552        let mut load_events = match &condition {
553            WaitUntil::NetworkIdle => Some(
554                self.page
555                    .event_listener::<EventLoadEventFired>()
556                    .await
557                    .map_err(|e| BrowserError::NavigationFailed {
558                        url: url_owned.clone(),
559                        reason: e.to_string(),
560                    })?,
561            ),
562            _ => None,
563        };
564
565        let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
566            Some(self.subscribe_inflight_counter().await)
567        } else {
568            None
569        };
570
571        self.page
572            .goto(url)
573            .await
574            .map_err(|e| BrowserError::NavigationFailed {
575                url: url_owned.clone(),
576                reason: e.to_string(),
577            })?;
578
579        match &condition {
580            WaitUntil::DomContentLoaded => {
581                if let Some(ref mut events) = dom_events {
582                    let _ = events.next().await;
583                }
584            }
585            WaitUntil::NetworkIdle => {
586                if let Some(ref mut events) = load_events {
587                    let _ = events.next().await;
588                }
589                if let Some(ref counter) = inflight {
590                    Self::wait_network_idle(counter).await;
591                }
592            }
593            WaitUntil::Selector(css) => {
594                self.wait_for_selector(css, nav_timeout).await?;
595            }
596        }
597        Ok(())
598    }
599
600    /// Spawn three detached tasks that maintain a signed in-flight request
601    /// counter via `Network.requestWillBeSent` (+1) and
602    /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
603    /// Returns the shared counter so the caller can poll it.
604    async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
605        use std::sync::atomic::AtomicI32;
606
607        use chromiumoxide::cdp::browser_protocol::network::{
608            EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
609        };
610        use futures::StreamExt;
611
612        let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
613        let pairs: [(Arc<AtomicI32>, i32); 3] = [
614            (Arc::clone(&counter), 1),
615            (Arc::clone(&counter), -1),
616            (Arc::clone(&counter), -1),
617        ];
618        let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
619
620        macro_rules! spawn_tracker {
621            ($page:expr, $event:ty, $c:expr, $delta:expr) => {
622                match $page.event_listener::<$event>().await {
623                    Ok(mut s) => {
624                        let c = $c;
625                        let d = $delta;
626                        tokio::spawn(async move {
627                            while s.next().await.is_some() {
628                                c.fetch_add(d, Ordering::Relaxed);
629                            }
630                        });
631                    }
632                    Err(e) => warn!("network-idle tracker unavailable: {e}"),
633                }
634            };
635        }
636
637        let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
638        spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
639        spawn_tracker!(p2, EventLoadingFinished, c2, d2);
640        spawn_tracker!(p3, EventLoadingFailed, c3, d3);
641
642        counter
643    }
644
645    /// Poll `counter` until ≤ 2 in-flight requests persist for 500 ms
646    /// (equivalent to Playwright's `networkidle2`).
647    async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
648        const IDLE_THRESHOLD: i32 = 2;
649        const SETTLE: Duration = Duration::from_millis(500);
650        loop {
651            if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
652                tokio::time::sleep(SETTLE).await;
653                if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
654                    break;
655                }
656            } else {
657                tokio::time::sleep(Duration::from_millis(50)).await;
658            }
659        }
660    }
661
662    /// Wait until `document.querySelector(selector)` is non-null (`timeout`).
663    ///
664    /// # Errors
665    ///
666    /// Returns [`BrowserError::NavigationFailed`] if the selector is not found
667    /// within the given timeout.
668    pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
669        let selector_owned = selector.to_string();
670        let poll = async {
671            loop {
672                if self.page.find_element(selector_owned.clone()).await.is_ok() {
673                    return Ok(());
674                }
675                tokio::time::sleep(Duration::from_millis(100)).await;
676            }
677        };
678
679        timeout(wait_timeout, poll)
680            .await
681            .map_err(|_| BrowserError::NavigationFailed {
682                url: String::new(),
683                reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
684            })?
685    }
686
687    /// Set a resource filter to block specific network request types.
688    ///
689    /// Enables `Fetch` interception and spawns a background task that continues
690    /// allowed requests and fails blocked ones with `BlockedByClient`. Any
691    /// previously set filter task is cancelled first.
692    ///
693    /// # Errors
694    ///
695    /// Returns a [`BrowserError::CdpError`] if the CDP call fails.
696    pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
697        use chromiumoxide::cdp::browser_protocol::fetch::{
698            ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
699            RequestPattern,
700        };
701        use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
702        use futures::StreamExt as _;
703
704        if filter.is_empty() {
705            return Ok(());
706        }
707
708        // Cancel any previously running filter task.
709        if let Some(task) = self.resource_filter_task.take() {
710            task.abort();
711        }
712
713        let pattern = RequestPattern::builder().url_pattern("*").build();
714        let params = EnableParams::builder()
715            .patterns(vec![pattern])
716            .handle_auth_requests(false)
717            .build();
718
719        timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
720            .await
721            .map_err(|_| BrowserError::Timeout {
722                operation: "Fetch.enable".to_string(),
723                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
724            })?
725            .map_err(|e| BrowserError::CdpError {
726                operation: "Fetch.enable".to_string(),
727                message: e.to_string(),
728            })?;
729
730        // Subscribe to requestPaused events and dispatch each one so navigation
731        // is never blocked. Without this handler Chrome holds every intercepted
732        // request indefinitely and the page hangs.
733        let mut events = self
734            .page
735            .event_listener::<EventRequestPaused>()
736            .await
737            .map_err(|e| BrowserError::CdpError {
738                operation: "Fetch.requestPaused subscribe".to_string(),
739                message: e.to_string(),
740            })?;
741
742        let page = self.page.clone();
743        debug!("Resource filter active: {:?}", filter);
744        let task = tokio::spawn(async move {
745            while let Some(event) = events.next().await {
746                let request_id = event.request_id.clone();
747                if filter.should_block(event.resource_type.as_ref()) {
748                    let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
749                    let _ = page.execute(params).await;
750                } else {
751                    let _ = page.execute(ContinueRequestParams::new(request_id)).await;
752                }
753            }
754        });
755
756        self.resource_filter_task = Some(task);
757        Ok(())
758    }
759
760    /// Return the current page URL (post-navigation, post-redirect).
761    ///
762    /// Delegates to the CDP `Target.getTargetInfo` binding already used
763    /// internally by [`save_cookies`](Self::save_cookies); no extra network
764    /// request is made.  Returns an empty string if the URL is not yet set
765    /// (e.g. on a blank tab before the first navigation).
766    ///
767    /// # Errors
768    ///
769    /// Returns [`BrowserError::CdpError`] if the underlying CDP call fails, or
770    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
771    ///
772    /// # Example
773    ///
774    /// ```no_run
775    /// use stygian_browser::{BrowserPool, BrowserConfig};
776    /// use stygian_browser::page::WaitUntil;
777    /// use std::time::Duration;
778    ///
779    /// # async fn run() -> stygian_browser::error::Result<()> {
780    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
781    /// let handle = pool.acquire().await?;
782    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
783    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
784    /// let url = page.url().await?;
785    /// println!("Final URL after redirects: {url}");
786    /// # Ok(())
787    /// # }
788    /// ```
789    pub async fn url(&self) -> Result<String> {
790        timeout(self.cdp_timeout, self.page.url())
791            .await
792            .map_err(|_| BrowserError::Timeout {
793                operation: "page.url".to_string(),
794                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
795            })?
796            .map_err(|e| BrowserError::CdpError {
797                operation: "page.url".to_string(),
798                message: e.to_string(),
799            })
800            .map(Option::unwrap_or_default)
801    }
802
803    /// Return the HTTP status code of the most recent main-frame navigation.
804    ///
805    /// The status is captured from the `Network.responseReceived` CDP event
806    /// wired up inside [`navigate`](Self::navigate), so it reflects the
807    /// *final* response after any server-side redirects.
808    ///
809    /// Returns `None` if the status was not captured — for example on `file://`
810    /// navigations, when [`navigate`](Self::navigate) has not yet been called,
811    /// or if the network event subscription failed.
812    ///
813    /// # Errors
814    ///
815    /// This method is infallible; the `Result` wrapper is kept for API
816    /// consistency with other `PageHandle` methods.
817    ///
818    /// # Example
819    ///
820    /// ```no_run
821    /// use stygian_browser::{BrowserPool, BrowserConfig};
822    /// use stygian_browser::page::WaitUntil;
823    /// use std::time::Duration;
824    ///
825    /// # async fn run() -> stygian_browser::error::Result<()> {
826    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
827    /// let handle = pool.acquire().await?;
828    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
829    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
830    /// if let Some(code) = page.status_code()? {
831    ///     println!("HTTP {code}");
832    /// }
833    /// # Ok(())
834    /// # }
835    /// ```
836    pub fn status_code(&self) -> Result<Option<u16>> {
837        let code = self.last_status_code.load(Ordering::Acquire);
838        Ok(if code == 0 { None } else { Some(code) })
839    }
840
841    /// Return the page's `<title>` text.
842    ///
843    /// # Errors
844    ///
845    /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
846    pub async fn title(&self) -> Result<String> {
847        timeout(self.cdp_timeout, self.page.get_title())
848            .await
849            .map_err(|_| BrowserError::Timeout {
850                operation: "get_title".to_string(),
851                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
852            })?
853            .map_err(|e| BrowserError::ScriptExecutionFailed {
854                script: "document.title".to_string(),
855                reason: e.to_string(),
856            })
857            .map(Option::unwrap_or_default)
858    }
859
860    /// Return the page's full outer HTML.
861    ///
862    /// # Errors
863    ///
864    /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
865    pub async fn content(&self) -> Result<String> {
866        timeout(self.cdp_timeout, self.page.content())
867            .await
868            .map_err(|_| BrowserError::Timeout {
869                operation: "page.content".to_string(),
870                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
871            })?
872            .map_err(|e| BrowserError::ScriptExecutionFailed {
873                script: "document.documentElement.outerHTML".to_string(),
874                reason: e.to_string(),
875            })
876    }
877
878    /// Query the live DOM for all elements matching `selector` and return
879    /// lightweight [`NodeHandle`]s backed by CDP `RemoteObjectId`s.
880    ///
881    /// No HTML serialisation occurs — the browser's in-memory DOM is queried
882    /// directly over the CDP connection, eliminating the `page.content()` +
883    /// `scraper::Html::parse_document` round-trip.
884    ///
885    /// Returns an empty `Vec` when no elements match (consistent with the JS
886    /// `querySelectorAll` contract — not an error).
887    ///
888    /// # Errors
889    ///
890    /// Returns [`BrowserError::CdpError`] if the CDP find call fails, or
891    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
892    ///
893    /// # Example
894    ///
895    /// ```no_run
896    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
897    /// use std::time::Duration;
898    ///
899    /// # async fn run() -> stygian_browser::error::Result<()> {
900    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
901    /// let handle = pool.acquire().await?;
902    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
903    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
904    ///
905    /// let nodes = page.query_selector_all("[data-ux]").await?;
906    /// for node in &nodes {
907    ///     let ux_type = node.attr("data-ux").await?;
908    ///     let text    = node.text_content().await?;
909    ///     println!("{ux_type:?}: {text}");
910    /// }
911    /// # Ok(())
912    /// # }
913    /// ```
914    pub async fn query_selector_all(&self, selector: &str) -> Result<Vec<NodeHandle>> {
915        let elements = timeout(self.cdp_timeout, self.page.find_elements(selector))
916            .await
917            .map_err(|_| BrowserError::Timeout {
918                operation: "PageHandle::query_selector_all".to_string(),
919                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
920            })?
921            .map_err(|e| BrowserError::CdpError {
922                operation: "PageHandle::query_selector_all".to_string(),
923                message: e.to_string(),
924            })?;
925
926        Ok(elements
927            .into_iter()
928            .map(|el| NodeHandle {
929                element: el,
930                selector: selector.to_string(),
931                cdp_timeout: self.cdp_timeout,
932            })
933            .collect())
934    }
935
936    /// Evaluate arbitrary JavaScript and return the result as `T`.
937    ///
938    /// # Errors
939    ///
940    /// Returns [`BrowserError::ScriptExecutionFailed`] on eval failure or
941    /// deserialization error.
942    pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
943        let script_owned = script.to_string();
944        timeout(self.cdp_timeout, self.page.evaluate(script))
945            .await
946            .map_err(|_| BrowserError::Timeout {
947                operation: "page.evaluate".to_string(),
948                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
949            })?
950            .map_err(|e| BrowserError::ScriptExecutionFailed {
951                script: script_owned.clone(),
952                reason: e.to_string(),
953            })?
954            .into_value::<T>()
955            .map_err(|e| BrowserError::ScriptExecutionFailed {
956                script: script_owned,
957                reason: e.to_string(),
958            })
959    }
960
961    /// Save all cookies for the current page's origin.
962    ///
963    /// # Errors
964    ///
965    /// Returns [`BrowserError::CdpError`] if the CDP call fails.
966    pub async fn save_cookies(
967        &self,
968    ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
969        use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
970
971        let url = self
972            .page
973            .url()
974            .await
975            .map_err(|e| BrowserError::CdpError {
976                operation: "page.url".to_string(),
977                message: e.to_string(),
978            })?
979            .unwrap_or_default();
980
981        timeout(
982            self.cdp_timeout,
983            self.page
984                .execute(GetCookiesParams::builder().urls(vec![url]).build()),
985        )
986        .await
987        .map_err(|_| BrowserError::Timeout {
988            operation: "Network.getCookies".to_string(),
989            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
990        })?
991        .map_err(|e| BrowserError::CdpError {
992            operation: "Network.getCookies".to_string(),
993            message: e.to_string(),
994        })
995        .map(|r| r.cookies.clone())
996    }
997
998    /// Inject cookies into the current page.
999    ///
1000    /// Seeds session tokens or other state without needing a full
1001    /// [`SessionSnapshot`][crate::session::SessionSnapshot] and without
1002    /// requiring a direct `chromiumoxide` dependency in calling code.
1003    ///
1004    /// Individual cookie failures are logged as warnings and do not abort the
1005    /// remaining cookies.
1006    ///
1007    /// # Errors
1008    ///
1009    /// Returns [`BrowserError::Timeout`] if a single `Network.setCookie` CDP
1010    /// call exceeds `cdp_timeout`.
1011    ///
1012    /// # Example
1013    ///
1014    /// ```no_run
1015    /// use stygian_browser::{BrowserPool, BrowserConfig};
1016    /// use stygian_browser::session::SessionCookie;
1017    /// use std::time::Duration;
1018    ///
1019    /// # async fn run() -> stygian_browser::error::Result<()> {
1020    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1021    /// let handle = pool.acquire().await?;
1022    /// let page = handle.browser().expect("valid browser").new_page().await?;
1023    /// let cookies = vec![SessionCookie {
1024    ///     name: "session".to_string(),
1025    ///     value: "abc123".to_string(),
1026    ///     domain: ".example.com".to_string(),
1027    ///     path: "/".to_string(),
1028    ///     expires: -1.0,
1029    ///     http_only: true,
1030    ///     secure: true,
1031    ///     same_site: "Lax".to_string(),
1032    /// }];
1033    /// page.inject_cookies(&cookies).await?;
1034    /// # Ok(())
1035    /// # }
1036    /// ```
1037    pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
1038        use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
1039
1040        for cookie in cookies {
1041            let params = match SetCookieParams::builder()
1042                .name(cookie.name.clone())
1043                .value(cookie.value.clone())
1044                .domain(cookie.domain.clone())
1045                .path(cookie.path.clone())
1046                .http_only(cookie.http_only)
1047                .secure(cookie.secure)
1048                .build()
1049            {
1050                Ok(p) => p,
1051                Err(e) => {
1052                    warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
1053                    continue;
1054                }
1055            };
1056
1057            match timeout(self.cdp_timeout, self.page.execute(params)).await {
1058                Err(_) => {
1059                    warn!(
1060                        cookie = %cookie.name,
1061                        timeout_ms = self.cdp_timeout.as_millis(),
1062                        "Timed out injecting cookie"
1063                    );
1064                }
1065                Ok(Err(e)) => {
1066                    warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
1067                }
1068                Ok(Ok(_)) => {}
1069            }
1070        }
1071
1072        debug!(count = cookies.len(), "Cookies injected");
1073        Ok(())
1074    }
1075
1076    /// Capture a screenshot of the current page as PNG bytes.
1077    ///
1078    /// The screenshot is full-page by default (viewport clipped to the rendered
1079    /// layout area).  Save the returned bytes to a `.png` file or process
1080    /// them in-memory.
1081    ///
1082    /// # Errors
1083    ///
1084    /// Returns [`BrowserError::CdpError`] if the CDP `Page.captureScreenshot`
1085    /// command fails, or [`BrowserError::Timeout`] if it exceeds
1086    /// `cdp_timeout`.
1087    ///
1088    /// # Example
1089    ///
1090    /// ```no_run
1091    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1092    /// use std::{time::Duration, fs};
1093    ///
1094    /// # async fn run() -> stygian_browser::error::Result<()> {
1095    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1096    /// let handle = pool.acquire().await?;
1097    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1098    /// page.navigate("https://example.com", WaitUntil::Selector("body".to_string()), Duration::from_secs(30)).await?;
1099    /// let png = page.screenshot().await?;
1100    /// fs::write("screenshot.png", &png).unwrap();
1101    /// # Ok(())
1102    /// # }
1103    /// ```
1104    pub async fn screenshot(&self) -> Result<Vec<u8>> {
1105        use chromiumoxide::page::ScreenshotParams;
1106
1107        let params = ScreenshotParams::builder().full_page(true).build();
1108
1109        timeout(self.cdp_timeout, self.page.screenshot(params))
1110            .await
1111            .map_err(|_| BrowserError::Timeout {
1112                operation: "Page.captureScreenshot".to_string(),
1113                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1114            })?
1115            .map_err(|e| BrowserError::CdpError {
1116                operation: "Page.captureScreenshot".to_string(),
1117                message: e.to_string(),
1118            })
1119    }
1120
1121    /// Borrow the underlying chromiumoxide [`Page`].
1122    pub const fn inner(&self) -> &Page {
1123        &self.page
1124    }
1125
1126    /// Close this page (tab).
1127    ///
1128    /// Called automatically on drop; explicit call avoids suppressing the error.
1129    pub async fn close(self) -> Result<()> {
1130        timeout(Duration::from_secs(5), self.page.clone().close())
1131            .await
1132            .map_err(|_| BrowserError::Timeout {
1133                operation: "page.close".to_string(),
1134                duration_ms: 5000,
1135            })?
1136            .map_err(|e| BrowserError::CdpError {
1137                operation: "page.close".to_string(),
1138                message: e.to_string(),
1139            })
1140    }
1141}
1142
1143// ─── Stealth diagnostics ──────────────────────────────────────────────────────
1144
1145#[cfg(feature = "stealth")]
1146impl PageHandle {
1147    /// Run all built-in stealth detection checks against the current page.
1148    ///
1149    /// Iterates [`crate::diagnostic::all_checks`], evaluates each check's
1150    /// JavaScript via CDP `Runtime.evaluate`, and returns an aggregate
1151    /// [`crate::diagnostic::DiagnosticReport`].
1152    ///
1153    /// Failed scripts (due to JS exceptions or deserialization errors) are
1154    /// recorded as failing checks and do **not** abort the whole run.
1155    ///
1156    /// # Errors
1157    ///
1158    /// Returns an error only if the underlying CDP transport fails entirely.
1159    /// Individual check failures are captured in the report.
1160    ///
1161    /// # Example
1162    ///
1163    /// ```no_run
1164    /// # async fn run() -> stygian_browser::error::Result<()> {
1165    /// use stygian_browser::{BrowserPool, BrowserConfig};
1166    /// use stygian_browser::page::WaitUntil;
1167    /// use std::time::Duration;
1168    ///
1169    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1170    /// let handle = pool.acquire().await?;
1171    /// let browser = handle.browser().expect("valid browser");
1172    /// let mut page = browser.new_page().await?;
1173    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(10)).await?;
1174    ///
1175    /// let report = page.verify_stealth().await?;
1176    /// println!("Stealth: {}/{} checks passed", report.passed_count, report.checks.len());
1177    /// for failure in report.failures() {
1178    ///     eprintln!("  FAIL  {}: {}", failure.description, failure.details);
1179    /// }
1180    /// # Ok(())
1181    /// # }
1182    /// ```
1183    pub async fn verify_stealth(&self) -> Result<crate::diagnostic::DiagnosticReport> {
1184        use crate::diagnostic::{CheckResult, DiagnosticReport, all_checks};
1185
1186        let mut results: Vec<CheckResult> = Vec::new();
1187
1188        for check in all_checks() {
1189            let result = match self.eval::<String>(check.script).await {
1190                Ok(json) => check.parse_output(&json),
1191                Err(e) => {
1192                    tracing::warn!(
1193                        check = ?check.id,
1194                        error = %e,
1195                        "stealth check script failed during evaluation"
1196                    );
1197                    CheckResult {
1198                        id: check.id,
1199                        description: check.description.to_string(),
1200                        passed: false,
1201                        details: format!("script error: {e}"),
1202                    }
1203                }
1204            };
1205            tracing::debug!(
1206                check = ?result.id,
1207                passed = result.passed,
1208                details = %result.details,
1209                "stealth check result"
1210            );
1211            results.push(result);
1212        }
1213
1214        Ok(DiagnosticReport::new(results))
1215    }
1216}
1217
1218impl Drop for PageHandle {
1219    fn drop(&mut self) {
1220        warn!("PageHandle dropped without explicit close(); spawning cleanup task");
1221        // chromiumoxide Page does not implement close on Drop, so we spawn
1222        // a fire-and-forget task. The page ref is already owned; we need to
1223        // swap it out. We clone the Page handle (it's Arc-backed internally).
1224        let page = self.page.clone();
1225        tokio::spawn(async move {
1226            let _ = page.close().await;
1227        });
1228    }
1229}
1230
1231// ─── Tests ────────────────────────────────────────────────────────────────────
1232
1233#[cfg(test)]
1234mod tests {
1235    use super::*;
1236
1237    #[test]
1238    fn resource_filter_block_media_blocks_image() {
1239        let filter = ResourceFilter::block_media();
1240        assert!(filter.should_block("Image"));
1241        assert!(filter.should_block("Font"));
1242        assert!(filter.should_block("Stylesheet"));
1243        assert!(filter.should_block("Media"));
1244        assert!(!filter.should_block("Script"));
1245        assert!(!filter.should_block("XHR"));
1246    }
1247
1248    #[test]
1249    fn resource_filter_case_insensitive() {
1250        let filter = ResourceFilter::block_images_and_fonts();
1251        assert!(filter.should_block("image")); // lowercase
1252        assert!(filter.should_block("IMAGE")); // uppercase
1253        assert!(!filter.should_block("Stylesheet"));
1254    }
1255
1256    #[test]
1257    fn resource_filter_builder_chain() {
1258        let filter = ResourceFilter::default()
1259            .block(ResourceType::Image)
1260            .block(ResourceType::Font);
1261        assert!(filter.should_block("Image"));
1262        assert!(filter.should_block("Font"));
1263        assert!(!filter.should_block("Stylesheet"));
1264    }
1265
1266    #[test]
1267    fn resource_filter_dedup_block() {
1268        let filter = ResourceFilter::default()
1269            .block(ResourceType::Image)
1270            .block(ResourceType::Image); // duplicate
1271        assert_eq!(filter.blocked.len(), 1);
1272    }
1273
1274    #[test]
1275    fn resource_filter_is_empty_when_default() {
1276        assert!(ResourceFilter::default().is_empty());
1277        assert!(!ResourceFilter::block_media().is_empty());
1278    }
1279
1280    #[test]
1281    fn wait_until_selector_stores_string() {
1282        let w = WaitUntil::Selector("#foo".to_string());
1283        assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
1284    }
1285
1286    #[test]
1287    fn resource_type_cdp_str() {
1288        assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
1289        assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
1290        assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
1291        assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
1292    }
1293
1294    /// `PageHandle` must be `Send + Sync` for use across thread boundaries.
1295    #[test]
1296    fn page_handle_is_send_sync() {
1297        fn assert_send<T: Send>() {}
1298        fn assert_sync<T: Sync>() {}
1299        assert_send::<PageHandle>();
1300        assert_sync::<PageHandle>();
1301    }
1302
1303    /// The status-code sentinel (0 = "not yet captured") and the conversion to
1304    /// `Option<u16>` are pure-logic invariants testable without a live browser.
1305    #[test]
1306    fn status_code_sentinel_zero_maps_to_none() {
1307        use std::sync::atomic::{AtomicU16, Ordering};
1308        let atom = AtomicU16::new(0);
1309        let code = atom.load(Ordering::Acquire);
1310        assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
1311    }
1312
1313    #[test]
1314    fn status_code_non_zero_maps_to_some() {
1315        use std::sync::atomic::{AtomicU16, Ordering};
1316        for &expected in &[200u16, 301, 404, 503] {
1317            let atom = AtomicU16::new(expected);
1318            let code = atom.load(Ordering::Acquire);
1319            assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
1320        }
1321    }
1322
1323    // ── NodeHandle pure-logic tests ───────────────────────────────────────────
1324
1325    /// `attr_map` relies on `chunks_exact(2)` — verify the pairing logic is
1326    /// correct without a live browser by exercising it directly.
1327    #[test]
1328    fn attr_map_chunking_pairs_correctly() {
1329        let flat = [
1330            "id".to_string(),
1331            "main".to_string(),
1332            "data-ux".to_string(),
1333            "Section".to_string(),
1334            "class".to_string(),
1335            "container".to_string(),
1336        ];
1337        let mut map = std::collections::HashMap::with_capacity(flat.len() / 2);
1338        for pair in flat.chunks_exact(2) {
1339            if let [name, value] = pair {
1340                map.insert(name.clone(), value.clone());
1341            }
1342        }
1343        assert_eq!(map.get("id").map(String::as_str), Some("main"));
1344        assert_eq!(map.get("data-ux").map(String::as_str), Some("Section"));
1345        assert_eq!(map.get("class").map(String::as_str), Some("container"));
1346        assert_eq!(map.len(), 3);
1347    }
1348
1349    /// Odd-length flat attribute lists (malformed CDP response) are handled
1350    /// gracefully — the trailing element is silently ignored.
1351    #[test]
1352    fn attr_map_chunking_ignores_odd_trailing() {
1353        let flat = ["orphan".to_string()]; // no value
1354        let mut map = std::collections::HashMap::new();
1355        for pair in flat.chunks_exact(2) {
1356            if let [name, value] = pair {
1357                map.insert(name.clone(), value.clone());
1358            }
1359        }
1360        assert!(map.is_empty());
1361    }
1362
1363    /// Empty flat list → empty map.
1364    #[test]
1365    fn attr_map_chunking_empty_input() {
1366        let flat: Vec<String> = vec![];
1367        let map: std::collections::HashMap<String, String> = flat
1368            .chunks_exact(2)
1369            .filter_map(|pair| {
1370                if let [name, value] = pair {
1371                    Some((name.clone(), value.clone()))
1372                } else {
1373                    None
1374                }
1375            })
1376            .collect();
1377        assert!(map.is_empty());
1378    }
1379
1380    /// `ancestors` JSON parsing: valid input round-trips correctly.
1381    #[test]
1382    fn ancestors_json_parse_round_trip() -> std::result::Result<(), serde_json::Error> {
1383        let json = r#"["p","article","body","html"]"#;
1384        let result: Vec<String> = serde_json::from_str(json)?;
1385        assert_eq!(result, ["p", "article", "body", "html"]);
1386        Ok(())
1387    }
1388
1389    /// `ancestors` JSON parsing: empty array (no parent) is fine.
1390    #[test]
1391    fn ancestors_json_parse_empty() -> std::result::Result<(), serde_json::Error> {
1392        let json = "[]";
1393        let result: Vec<String> = serde_json::from_str(json)?;
1394        assert!(result.is_empty());
1395        Ok(())
1396    }
1397}