Skip to main content

stygian_browser/
page.rs

1//!
2//! ## Resource blocking
3//!
4//! ## Wait strategies
5//!
6//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
7//! - `DomContentLoaded` — fires when the HTML is parsed
8//!
9//! # Example
10//!
11//! ```no_run
12//! use stygian_browser::{BrowserPool, BrowserConfig};
13//! use stygian_browser::page::{ResourceFilter, WaitUntil};
14//! use std::time::Duration;
15//!
16//! # async fn run() -> stygian_browser::error::Result<()> {
17//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
18//! let handle = pool.acquire().await?;
19//!
20//! let mut page = handle.browser().expect("valid browser").new_page().await?;
21//! page.set_resource_filter(ResourceFilter::block_media()).await?;
22//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
23//! let title = page.title().await?;
24//! println!("title: {title}");
25//! handle.release().await;
26//! # Ok(())
27//! # }
28//! ```
29
30use std::collections::HashMap;
31use std::sync::{
32    Arc,
33    atomic::{AtomicU16, Ordering},
34};
35use std::time::Duration;
36
37use chromiumoxide::Page;
38use serde::{Deserialize, Serialize};
39use tokio::time::timeout;
40use tracing::{debug, warn};
41
42use crate::error::{BrowserError, Result};
43
44// ─── ResourceType ─────────────────────────────────────────────────────────────
45
46/// CDP resource types that can be intercepted.
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub enum ResourceType {
49    /// `<img>`, `<picture>`, background images
50    Image,
51    /// Web fonts loaded via CSS `@font-face`
52    Font,
53    /// External CSS stylesheets
54    Stylesheet,
55    /// Media files (audio/video)
56    Media,
57}
58
59impl ResourceType {
60    pub const fn as_cdp_str(&self) -> &'static str {
61        match self {
62            Self::Image => "Image",
63            Self::Font => "Font",
64            Self::Stylesheet => "Stylesheet",
65            Self::Media => "Media",
66        }
67    }
68}
69
70// ─── ResourceFilter ───────────────────────────────────────────────────────────
71
72///
73/// # Example
74///
75/// ```
76/// use stygian_browser::page::ResourceFilter;
77/// let filter = ResourceFilter::block_media();
78/// assert!(filter.should_block("Image"));
79/// ```
80#[derive(Debug, Clone, Default)]
81pub struct ResourceFilter {
82    blocked: Vec<ResourceType>,
83}
84
85impl ResourceFilter {
86    /// Block all media resources (images, fonts, CSS, audio/video).
87    pub fn block_media() -> Self {
88        Self {
89            blocked: vec![
90                ResourceType::Image,
91                ResourceType::Font,
92                ResourceType::Stylesheet,
93                ResourceType::Media,
94            ],
95        }
96    }
97
98    pub fn block_images_and_fonts() -> Self {
99        Self {
100            blocked: vec![ResourceType::Image, ResourceType::Font],
101        }
102    }
103
104    #[must_use]
105    pub fn block(mut self, resource: ResourceType) -> Self {
106        if !self.blocked.contains(&resource) {
107            self.blocked.push(resource);
108        }
109        self
110    }
111
112    pub fn should_block(&self, cdp_type: &str) -> bool {
113        self.blocked
114            .iter()
115            .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
116    }
117
118    pub const fn is_empty(&self) -> bool {
119        self.blocked.is_empty()
120    }
121}
122
123// ─── WaitUntil ────────────────────────────────────────────────────────────────
124
125///
126/// # Example
127///
128/// ```
129/// use stygian_browser::page::WaitUntil;
130/// ```
131/// Specifies what condition to wait for after a page navigation.
132#[derive(Debug, Clone)]
133pub enum WaitUntil {
134    /// Fires when the initial HTML is fully parsed, without waiting for
135    /// subresources such as images and stylesheets to finish loading.
136    DomContentLoaded,
137    NetworkIdle,
138    Selector(String),
139}
140
141// ─── NodeHandle ───────────────────────────────────────────────────────────────
142
143///
144/// more CDP `Runtime.callFunctionOn` calls against the held V8 remote object
145/// reference — no HTML serialisation occurs.
146///
147/// A handle becomes **stale** after page navigation or if the underlying DOM
148/// node is removed.  Stale calls return [`BrowserError::StaleNode`] so callers
149/// can distinguish them from other CDP failures.
150///
151/// # Example
152///
153/// ```no_run
154/// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
155/// use std::time::Duration;
156///
157/// # async fn run() -> stygian_browser::error::Result<()> {
158/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
159/// let handle = pool.acquire().await?;
160/// let mut page = handle.browser().expect("valid browser").new_page().await?;
161/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
162/// # let nodes = page.query_selector_all("a").await?;
163/// # for node in &nodes {
164///     let href = node.attr("href").await?;
165///     let text = node.text_content().await?;
166///     println!("{text}: {href:?}");
167/// # }
168/// # Ok(())
169/// # }
170/// ```
171pub struct NodeHandle {
172    element: chromiumoxide::element::Element,
173    /// Shared via `Arc<str>` so all handles from a single query reuse the
174    /// same allocation rather than cloning a `String` per node.
175    selector: Arc<str>,
176    cdp_timeout: Duration,
177    /// during DOM traversal (parent / sibling navigation).
178    page: chromiumoxide::Page,
179}
180
181impl NodeHandle {
182    /// Return a single attribute value, or `None` if the attribute is absent.
183    ///
184    /// Issues one `Runtime.callFunctionOn` CDP call (`el.getAttribute(name)`).
185    ///
186    /// # Errors
187    ///
188    /// invalidated, or [`BrowserError::Timeout`] / [`BrowserError::CdpError`]
189    /// on transport-level failures.
190    pub async fn attr(&self, name: &str) -> Result<Option<String>> {
191        timeout(self.cdp_timeout, self.element.attribute(name))
192            .await
193            .map_err(|_| BrowserError::Timeout {
194                operation: "NodeHandle::attr".to_string(),
195                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
196            })?
197            .map_err(|e| self.cdp_err_or_stale(&e, "attr"))
198    }
199
200    /// Return all attributes as a `HashMap<name, value>` in a **single**
201    /// CDP round-trip.
202    ///
203    /// Uses `DOM.getAttributes` (via the chromiumoxide `attributes()` API)
204    /// which returns a flat `[name, value, name, value, …]` list from the node
205    /// description — no per-attribute calls are needed.
206    ///
207    /// # Errors
208    ///
209    /// invalidated.
210    pub async fn attr_map(&self) -> Result<HashMap<String, String>> {
211        let flat = timeout(self.cdp_timeout, self.element.attributes())
212            .await
213            .map_err(|_| BrowserError::Timeout {
214                operation: "NodeHandle::attr_map".to_string(),
215                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
216            })?
217            .map_err(|e| self.cdp_err_or_stale(&e, "attr_map"))?;
218
219        let mut map = HashMap::with_capacity(flat.len() / 2);
220        for pair in flat.chunks_exact(2) {
221            if let [name, value] = pair {
222                map.insert(name.clone(), value.clone());
223            }
224        }
225        Ok(map)
226    }
227
228    /// Return the element's `textContent` (all text inside, no markup).
229    ///
230    /// Reads the DOM `textContent` property via a single JS eval — this is the
231    /// raw text concatenation of all descendant text nodes, independent of
232    /// layout or visibility (unlike `innerText`).
233    ///
234    ///
235    /// # Errors
236    ///
237    /// invalidated.
238    pub async fn text_content(&self) -> Result<String> {
239        let returns = timeout(
240            self.cdp_timeout,
241            self.element
242                .call_js_fn(r"function() { return this.textContent ?? ''; }", true),
243        )
244        .await
245        .map_err(|_| BrowserError::Timeout {
246            operation: "NodeHandle::text_content".to_string(),
247            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
248        })?
249        .map_err(|e| self.cdp_err_or_stale(&e, "text_content"))?;
250
251        Ok(returns
252            .result
253            .value
254            .as_ref()
255            .and_then(|v| v.as_str())
256            .unwrap_or("")
257            .to_string())
258    }
259
260    /// Return the element's `innerHTML`.
261    ///
262    ///
263    /// # Errors
264    ///
265    /// invalidated.
266    pub async fn inner_html(&self) -> Result<String> {
267        timeout(self.cdp_timeout, self.element.inner_html())
268            .await
269            .map_err(|_| BrowserError::Timeout {
270                operation: "NodeHandle::inner_html".to_string(),
271                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
272            })?
273            .map_err(|e| self.cdp_err_or_stale(&e, "inner_html"))
274            .map(Option::unwrap_or_default)
275    }
276
277    /// Return the element's `outerHTML`.
278    ///
279    ///
280    /// # Errors
281    ///
282    /// invalidated.
283    pub async fn outer_html(&self) -> Result<String> {
284        timeout(self.cdp_timeout, self.element.outer_html())
285            .await
286            .map_err(|_| BrowserError::Timeout {
287                operation: "NodeHandle::outer_html".to_string(),
288                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
289            })?
290            .map_err(|e| self.cdp_err_or_stale(&e, "outer_html"))
291            .map(Option::unwrap_or_default)
292    }
293
294    ///
295    /// Executes a single `Runtime.callFunctionOn` JavaScript function that
296    /// walks `parentElement` and collects tag names — no repeated CDP calls.
297    ///
298    /// ```text
299    /// ["p", "article", "body", "html"]
300    /// ```
301    ///
302    /// # Errors
303    ///
304    /// invalidated, or [`BrowserError::ScriptExecutionFailed`] when CDP
305    pub async fn ancestors(&self) -> Result<Vec<String>> {
306        let returns = timeout(
307            self.cdp_timeout,
308            self.element.call_js_fn(
309                r"function() {
310                    const a = [];
311                    let n = this.parentElement;
312                    while (n) { a.push(n.tagName.toLowerCase()); n = n.parentElement; }
313                    return a;
314                }",
315                true,
316            ),
317        )
318        .await
319        .map_err(|_| BrowserError::Timeout {
320            operation: "NodeHandle::ancestors".to_string(),
321            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
322        })?
323        .map_err(|e| self.cdp_err_or_stale(&e, "ancestors"))?;
324
325        // With returnByValue=true and an array return, CDP delivers the value
326        // as a JSON array directly — no JSON.stringify/re-parse needed.
327        // A missing or wrong-type value indicates an unexpected CDP failure.
328        let arr = returns
329            .result
330            .value
331            .as_ref()
332            .and_then(|v| v.as_array())
333            .ok_or_else(|| BrowserError::ScriptExecutionFailed {
334                script: "NodeHandle::ancestors".to_string(),
335                reason: "CDP returned no value or a non-array value for ancestors()".to_string(),
336            })?;
337
338        arr.iter()
339            .map(|v| {
340                v.as_str().map(ToString::to_string).ok_or_else(|| {
341                    BrowserError::ScriptExecutionFailed {
342                        script: "NodeHandle::ancestors".to_string(),
343                        reason: format!("ancestor entry is not a string: {v}"),
344                    }
345                })
346            })
347            .collect()
348    }
349
350    ///
351    ///
352    ///
353    /// # Errors
354    ///
355    /// invalidated, or [`BrowserError::CdpError`] on transport failure.
356    pub async fn children_matching(&self, selector: &str) -> Result<Vec<Self>> {
357        let elements = timeout(self.cdp_timeout, self.element.find_elements(selector))
358            .await
359            .map_err(|_| BrowserError::Timeout {
360                operation: "NodeHandle::children_matching".to_string(),
361                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
362            })?
363            .map_err(|e| self.cdp_err_or_stale(&e, "children_matching"))?;
364
365        let selector_arc: Arc<str> = Arc::from(selector);
366        Ok(elements
367            .into_iter()
368            .map(|el| Self {
369                element: el,
370                selector: selector_arc.clone(),
371                cdp_timeout: self.cdp_timeout,
372                page: self.page.clone(),
373            })
374            .collect())
375    }
376
377    /// Return the immediate parent element, or `None` if this element has no
378    /// parent (i.e. it is the document root).
379    ///
380    /// Issues a single `Runtime.callFunctionOn` CDP call that temporarily tags
381    /// the parent element with a unique attribute, then resolves it via a
382    /// CSS attribute selector.
383    ///
384    /// # Errors
385    ///
386    /// Returns an error if the CDP call fails or the page handle is invalidated.
387    ///
388    /// # Example
389    ///
390    /// ```no_run
391    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
392    /// use std::time::Duration;
393    ///
394    /// # async fn run() -> stygian_browser::error::Result<()> {
395    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
396    /// let handle = pool.acquire().await?;
397    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
398    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
399    /// # let nodes = page.query_selector_all("a").await?;
400    /// if let Some(parent) = nodes[0].parent().await? {
401    ///     let html = parent.outer_html().await?;
402    ///     println!("parent: {}", &html[..html.len().min(80)]);
403    /// }
404    /// # Ok(())
405    /// # }
406    /// ```
407    pub async fn parent(&self) -> Result<Option<Self>> {
408        let attr = format!(
409            "data-stygian-t-{}",
410            ulid::Ulid::new().to_string().to_lowercase()
411        );
412        let js = format!(
413            "function() {{ \
414                var t = this.parentElement; \
415                if (!t) {{ return false; }} \
416                t.setAttribute('{attr}', '1'); \
417                return true; \
418            }}"
419        );
420        self.call_traversal(&js, &attr, "parent").await
421    }
422
423    /// Return the next element sibling, or `None` if this element is the last
424    /// child of its parent.
425    ///
426    /// Uses `nextElementSibling` (skips text/comment nodes).
427    ///
428    /// # Errors
429    ///
430    /// invalidated.
431    ///
432    /// # Example
433    ///
434    /// ```no_run
435    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
436    /// use std::time::Duration;
437    ///
438    /// # async fn run() -> stygian_browser::error::Result<()> {
439    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
440    /// let handle = pool.acquire().await?;
441    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
442    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
443    /// # let nodes = page.query_selector_all("a").await?;
444    /// if let Some(next) = nodes[0].next_sibling().await? {
445    ///     println!("next sibling: {}", next.text_content().await?);
446    /// }
447    /// # Ok(())
448    /// # }
449    /// ```
450    pub async fn next_sibling(&self) -> Result<Option<Self>> {
451        let attr = format!(
452            "data-stygian-t-{}",
453            ulid::Ulid::new().to_string().to_lowercase()
454        );
455        let js = format!(
456            "function() {{ \
457                var t = this.nextElementSibling; \
458                if (!t) {{ return false; }} \
459                t.setAttribute('{attr}', '1'); \
460                return true; \
461            }}"
462        );
463        self.call_traversal(&js, &attr, "next").await
464    }
465
466    /// Return the previous element sibling, or `None` if this element is the
467    /// first child of its parent.
468    ///
469    /// Uses `previousElementSibling` (skips text/comment nodes).
470    ///
471    /// # Errors
472    ///
473    /// invalidated.
474    ///
475    /// # Example
476    ///
477    /// ```no_run
478    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
479    /// use std::time::Duration;
480    ///
481    /// # async fn run() -> stygian_browser::error::Result<()> {
482    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
483    /// let handle = pool.acquire().await?;
484    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
485    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
486    /// # let nodes = page.query_selector_all("a").await?;
487    /// if let Some(prev) = nodes[1].previous_sibling().await? {
488    ///     println!("prev sibling: {}", prev.text_content().await?);
489    /// }
490    /// # Ok(())
491    /// # }
492    /// ```
493    pub async fn previous_sibling(&self) -> Result<Option<Self>> {
494        let attr = format!(
495            "data-stygian-t-{}",
496            ulid::Ulid::new().to_string().to_lowercase()
497        );
498        let js = format!(
499            "function() {{ \
500                var t = this.previousElementSibling; \
501                if (!t) {{ return false; }} \
502                t.setAttribute('{attr}', '1'); \
503                return true; \
504            }}"
505        );
506        self.call_traversal(&js, &attr, "prev").await
507    }
508
509    /// Shared traversal implementation used by [`parent`], [`next_sibling`],
510    /// and [`previous_sibling`].
511    ///
512    /// The caller provides a JS function that:
513    /// 1. Computes the traversal target (for example, the parent, next
514    ///    sibling, or previous sibling) and stores it in a local variable.
515    /// 2. If the target is non-null, sets a unique attribute (`attr_name`)
516    ///    on it and returns `true`.
517    /// 3. Returns `false` when the target is null (no such neighbour).
518    ///
519    /// This helper then resolves the tagged element from the document root,
520    /// removes the temporary attribute, and wraps the result in a
521    /// `NodeHandle`.
522    ///
523    /// [`parent`]: Self::parent
524    /// [`next_sibling`]: Self::next_sibling
525    /// [`previous_sibling`]: Self::previous_sibling
526    async fn call_traversal(
527        &self,
528        js_fn: &str,
529        attr_name: &str,
530        selector_suffix: &str,
531    ) -> Result<Option<Self>> {
532        // Step 1: Run the JS that tags the target element and reports null/non-null.
533        let op_tag = format!("NodeHandle::{selector_suffix}::tag");
534        let returns = timeout(self.cdp_timeout, self.element.call_js_fn(js_fn, false))
535            .await
536            .map_err(|_| BrowserError::Timeout {
537                operation: op_tag.clone(),
538                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
539            })?
540            .map_err(|e| self.cdp_err_or_stale(&e, selector_suffix))?;
541
542        // JS returns false → no such neighbour.
543        let has_target = returns
544            .result
545            .value
546            .as_ref()
547            .and_then(serde_json::Value::as_bool)
548            .unwrap_or(false);
549        if !has_target {
550            return Ok(None);
551        }
552
553        let css = format!("[{attr_name}]");
554        let op_resolve = format!("NodeHandle::{selector_suffix}::resolve");
555        let element = timeout(self.cdp_timeout, self.page.find_element(css))
556            .await
557            .map_err(|_| BrowserError::Timeout {
558                operation: op_resolve.clone(),
559                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
560            })?
561            .map_err(|e| BrowserError::CdpError {
562                operation: op_resolve,
563                message: format!("{e:?}"),
564            })?;
565
566        // is non-fatal — it leaves a harmless stale attribute in the DOM).
567        let cleanup = format!("function() {{ this.removeAttribute('{attr_name}'); }}");
568        let _ = element.call_js_fn(cleanup, false).await;
569
570        let new_selector: Arc<str> =
571            Arc::from(format!("{}::{selector_suffix}", self.selector).as_str());
572        Ok(Some(Self {
573            element,
574            selector: new_selector,
575            cdp_timeout: self.cdp_timeout,
576            page: self.page.clone(),
577        }))
578    }
579
580    /// (when the remote object reference has been invalidated) or
581    fn cdp_err_or_stale(
582        &self,
583        err: &chromiumoxide::error::CdpError,
584        operation: &str,
585    ) -> BrowserError {
586        let msg = format!("{err:?}");
587        if msg.contains("Cannot find object with id")
588            || msg.contains("context with specified id")
589            || msg.contains("Cannot find context")
590        {
591            BrowserError::StaleNode {
592                selector: self.selector.to_string(),
593            }
594        } else {
595            BrowserError::CdpError {
596                operation: operation.to_string(),
597                message: msg,
598            }
599        }
600    }
601}
602
603// ─── PageHandle ───────────────────────────────────────────────────────────────
604
605///
606///
607/// # Example
608///
609/// ```no_run
610/// use stygian_browser::{BrowserPool, BrowserConfig};
611/// use stygian_browser::page::WaitUntil;
612/// use std::time::Duration;
613///
614/// # async fn run() -> stygian_browser::error::Result<()> {
615/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
616/// let handle = pool.acquire().await?;
617/// let mut page = handle.browser().expect("valid browser").new_page().await?;
618/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
619/// let html = page.content().await?;
620/// drop(page); // closes the tab
621/// handle.release().await;
622/// # Ok(())
623/// # }
624/// ```
625pub struct PageHandle {
626    page: Page,
627    cdp_timeout: Duration,
628    /// HTTP status code of the most recent main-frame navigation, or `0` if not
629    last_status_code: Arc<AtomicU16>,
630    /// Background task processing `Fetch.requestPaused` events. Aborted and
631    /// replaced each time `set_resource_filter` is called.
632    resource_filter_task: Option<tokio::task::JoinHandle<()>>,
633}
634
635impl PageHandle {
636    /// Wrap a raw chromiumoxide [`Page`] in a handle.
637    pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
638        Self {
639            page,
640            cdp_timeout,
641            last_status_code: Arc::new(AtomicU16::new(0)),
642            resource_filter_task: None,
643        }
644    }
645
646    ///
647    /// # Errors
648    ///
649    /// the CDP call fails.
650    pub async fn navigate(
651        &mut self,
652        url: &str,
653        condition: WaitUntil,
654        nav_timeout: Duration,
655    ) -> Result<()> {
656        self.setup_status_capture().await;
657        timeout(
658            nav_timeout,
659            self.navigate_inner(url, condition, nav_timeout),
660        )
661        .await
662        .map_err(|_| BrowserError::NavigationFailed {
663            url: url.to_string(),
664            reason: format!("navigation timed out after {nav_timeout:?}"),
665        })?
666    }
667
668    /// Reset the last status code and wire up the `Network.responseReceived`
669    /// so that a missing network domain never blocks navigation.
670    async fn setup_status_capture(&self) {
671        use chromiumoxide::cdp::browser_protocol::network::{
672            EventResponseReceived, ResourceType as NetworkResourceType,
673        };
674        use futures::StreamExt;
675
676        // Reset so a stale code is not returned if the new navigation fails
677        self.last_status_code.store(0, Ordering::Release);
678
679        let page_for_listener = self.page.clone();
680        let status_capture = Arc::clone(&self.last_status_code);
681        match page_for_listener
682            .event_listener::<EventResponseReceived>()
683            .await
684        {
685            Ok(mut stream) => {
686                tokio::spawn(async move {
687                    while let Some(event) = stream.next().await {
688                        if event.r#type == NetworkResourceType::Document {
689                            let code = u16::try_from(event.response.status).unwrap_or(0);
690                            if code > 0 {
691                                status_capture.store(code, Ordering::Release);
692                            }
693                            break;
694                        }
695                    }
696                });
697            }
698            Err(e) => warn!("status-code capture unavailable: {e}"),
699        }
700    }
701
702    /// described in issue #7.
703    async fn navigate_inner(
704        &self,
705        url: &str,
706        condition: WaitUntil,
707        nav_timeout: Duration,
708    ) -> Result<()> {
709        use chromiumoxide::cdp::browser_protocol::page::{
710            EventDomContentEventFired, EventLoadEventFired,
711        };
712        use futures::StreamExt;
713
714        let url_owned = url.to_string();
715
716        let mut dom_events = match &condition {
717            WaitUntil::DomContentLoaded => Some(
718                self.page
719                    .event_listener::<EventDomContentEventFired>()
720                    .await
721                    .map_err(|e| BrowserError::NavigationFailed {
722                        url: url_owned.clone(),
723                        reason: format!("{e:?}"),
724                    })?,
725            ),
726            _ => None,
727        };
728
729        let mut load_events = match &condition {
730            WaitUntil::NetworkIdle => Some(
731                self.page
732                    .event_listener::<EventLoadEventFired>()
733                    .await
734                    .map_err(|e| BrowserError::NavigationFailed {
735                        url: url_owned.clone(),
736                        reason: e.to_string(),
737                    })?,
738            ),
739            _ => None,
740        };
741
742        let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
743            Some(self.subscribe_inflight_counter().await)
744        } else {
745            None
746        };
747
748        self.page
749            .goto(url)
750            .await
751            .map_err(|e| BrowserError::NavigationFailed {
752                url: url_owned.clone(),
753                reason: e.to_string(),
754            })?;
755
756        match &condition {
757            WaitUntil::DomContentLoaded => {
758                if let Some(ref mut events) = dom_events {
759                    let _ = events.next().await;
760                }
761            }
762            WaitUntil::NetworkIdle => {
763                if let Some(ref mut events) = load_events {
764                    let _ = events.next().await;
765                }
766                if let Some(ref counter) = inflight {
767                    Self::wait_network_idle(counter).await;
768                }
769            }
770            WaitUntil::Selector(css) => {
771                self.wait_for_selector(css, nav_timeout).await?;
772            }
773        }
774        Ok(())
775    }
776
777    /// Spawn three detached tasks that maintain a signed in-flight request
778    /// counter via `Network.requestWillBeSent` (+1) and
779    /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
780    async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
781        use std::sync::atomic::AtomicI32;
782
783        use chromiumoxide::cdp::browser_protocol::network::{
784            EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
785        };
786        use futures::StreamExt;
787
788        let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
789        let pairs: [(Arc<AtomicI32>, i32); 3] = [
790            (Arc::clone(&counter), 1),
791            (Arc::clone(&counter), -1),
792            (Arc::clone(&counter), -1),
793        ];
794        let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
795
796        macro_rules! spawn_tracker {
797            ($page:expr, $event:ty, $c:expr, $delta:expr) => {
798                match $page.event_listener::<$event>().await {
799                    Ok(mut s) => {
800                        let c = $c;
801                        let d = $delta;
802                        tokio::spawn(async move {
803                            while s.next().await.is_some() {
804                                c.fetch_add(d, Ordering::Relaxed);
805                            }
806                        });
807                    }
808                    Err(e) => warn!("network-idle tracker unavailable: {e}"),
809                }
810            };
811        }
812
813        let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
814        spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
815        spawn_tracker!(p2, EventLoadingFinished, c2, d2);
816        spawn_tracker!(p3, EventLoadingFailed, c3, d3);
817
818        counter
819    }
820
821    async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
822        const IDLE_THRESHOLD: i32 = 2;
823        const SETTLE: Duration = Duration::from_millis(500);
824        loop {
825            if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
826                tokio::time::sleep(SETTLE).await;
827                if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
828                    break;
829                }
830            } else {
831                tokio::time::sleep(Duration::from_millis(50)).await;
832            }
833        }
834    }
835
836    ///
837    /// # Errors
838    ///
839    /// within the given timeout.
840    pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
841        let selector_owned = selector.to_string();
842        let poll = async {
843            loop {
844                if self.page.find_element(selector_owned.clone()).await.is_ok() {
845                    return Ok(());
846                }
847                tokio::time::sleep(Duration::from_millis(100)).await;
848            }
849        };
850
851        timeout(wait_timeout, poll)
852            .await
853            .map_err(|_| BrowserError::NavigationFailed {
854                url: String::new(),
855                reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
856            })?
857    }
858
859    ///
860    /// Enables `Fetch` interception and spawns a background task that continues
861    /// allowed requests and fails blocked ones with `BlockedByClient`. Any
862    /// previously set filter task is cancelled first.
863    ///
864    /// # Errors
865    ///
866    pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
867        use chromiumoxide::cdp::browser_protocol::fetch::{
868            ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
869            RequestPattern,
870        };
871        use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
872        use futures::StreamExt as _;
873
874        if filter.is_empty() {
875            return Ok(());
876        }
877
878        // Cancel any previously running filter task.
879        if let Some(task) = self.resource_filter_task.take() {
880            task.abort();
881        }
882
883        let pattern = RequestPattern::builder().url_pattern("*").build();
884        let params = EnableParams::builder()
885            .patterns(vec![pattern])
886            .handle_auth_requests(false)
887            .build();
888
889        timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
890            .await
891            .map_err(|_| BrowserError::Timeout {
892                operation: "Fetch.enable".to_string(),
893                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
894            })?
895            .map_err(|e| BrowserError::CdpError {
896                operation: "Fetch.enable".to_string(),
897                message: e.to_string(),
898            })?;
899
900        // is never blocked. Without this handler Chrome holds every intercepted
901        // request indefinitely and the page hangs.
902        let mut events = self
903            .page
904            .event_listener::<EventRequestPaused>()
905            .await
906            .map_err(|e| BrowserError::CdpError {
907                operation: "Fetch.requestPaused subscribe".to_string(),
908                message: e.to_string(),
909            })?;
910
911        let page = self.page.clone();
912        debug!("Resource filter active: {:?}", filter);
913        let task = tokio::spawn(async move {
914            while let Some(event) = events.next().await {
915                let request_id = event.request_id.clone();
916                if filter.should_block(event.resource_type.as_ref()) {
917                    let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
918                    let _ = page.execute(params).await;
919                } else {
920                    let _ = page.execute(ContinueRequestParams::new(request_id)).await;
921                }
922            }
923        });
924
925        self.resource_filter_task = Some(task);
926        Ok(())
927    }
928
929    /// Return the current page URL (post-navigation, post-redirect).
930    ///
931    /// internally by [`save_cookies`](Self::save_cookies); no extra network
932    /// request is made.  Returns an empty string if the URL is not yet set
933    ///
934    /// # Errors
935    ///
936    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
937    ///
938    /// # Example
939    ///
940    /// ```no_run
941    /// use stygian_browser::{BrowserPool, BrowserConfig};
942    /// use stygian_browser::page::WaitUntil;
943    /// use std::time::Duration;
944    ///
945    /// # async fn run() -> stygian_browser::error::Result<()> {
946    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
947    /// let handle = pool.acquire().await?;
948    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
949    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
950    /// let url = page.url().await?;
951    /// println!("Final URL after redirects: {url}");
952    /// # Ok(())
953    /// # }
954    /// ```
955    pub async fn url(&self) -> Result<String> {
956        timeout(self.cdp_timeout, self.page.url())
957            .await
958            .map_err(|_| BrowserError::Timeout {
959                operation: "page.url".to_string(),
960                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
961            })?
962            .map_err(|e| BrowserError::CdpError {
963                operation: "page.url".to_string(),
964                message: e.to_string(),
965            })
966            .map(Option::unwrap_or_default)
967    }
968
969    /// Return the HTTP status code of the most recent main-frame navigation.
970    ///
971    /// The status is captured from the `Network.responseReceived` CDP event
972    /// wired up inside [`navigate`](Self::navigate), so it reflects the
973    /// *final* response after any server-side redirects.
974    ///
975    /// navigations, when [`navigate`](Self::navigate) has not yet been called,
976    /// or if the network event subscription failed.
977    ///
978    /// # Errors
979    ///
980    ///
981    /// # Example
982    ///
983    /// ```no_run
984    /// use stygian_browser::{BrowserPool, BrowserConfig};
985    /// use stygian_browser::page::WaitUntil;
986    /// use std::time::Duration;
987    ///
988    /// # async fn run() -> stygian_browser::error::Result<()> {
989    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
990    /// let handle = pool.acquire().await?;
991    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
992    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
993    /// if let Some(code) = page.status_code()? {
994    ///     println!("HTTP {code}");
995    /// }
996    /// # Ok(())
997    /// # }
998    /// ```
999    pub fn status_code(&self) -> Result<Option<u16>> {
1000        let code = self.last_status_code.load(Ordering::Acquire);
1001        Ok(if code == 0 { None } else { Some(code) })
1002    }
1003
1004    /// Return the page's `<title>` text.
1005    ///
1006    /// # Errors
1007    ///
1008    pub async fn title(&self) -> Result<String> {
1009        timeout(self.cdp_timeout, self.page.get_title())
1010            .await
1011            .map_err(|_| BrowserError::Timeout {
1012                operation: "get_title".to_string(),
1013                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1014            })?
1015            .map_err(|e| BrowserError::ScriptExecutionFailed {
1016                script: "document.title".to_string(),
1017                reason: e.to_string(),
1018            })
1019            .map(Option::unwrap_or_default)
1020    }
1021
1022    /// Return the page's full outer HTML.
1023    ///
1024    /// # Errors
1025    ///
1026    pub async fn content(&self) -> Result<String> {
1027        timeout(self.cdp_timeout, self.page.content())
1028            .await
1029            .map_err(|_| BrowserError::Timeout {
1030                operation: "page.content".to_string(),
1031                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1032            })?
1033            .map_err(|e| BrowserError::ScriptExecutionFailed {
1034                script: "document.documentElement.outerHTML".to_string(),
1035                reason: e.to_string(),
1036            })
1037    }
1038
1039    /// lightweight [`NodeHandle`]s backed by CDP `RemoteObjectId`s.
1040    ///
1041    /// No HTML serialisation occurs — the browser's in-memory DOM is queried
1042    /// directly over the CDP connection, eliminating the `page.content()` +
1043    /// `scraper::Html::parse_document` round-trip.
1044    ///
1045    ///
1046    /// # Errors
1047    ///
1048    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
1049    ///
1050    /// # Example
1051    ///
1052    /// ```no_run
1053    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1054    /// use std::time::Duration;
1055    ///
1056    /// # async fn run() -> stygian_browser::error::Result<()> {
1057    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1058    /// let handle = pool.acquire().await?;
1059    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1060    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
1061    /// # let nodes = page.query_selector_all("div[data-ux]").await?;
1062    /// # for node in &nodes {
1063    ///     let ux_type = node.attr("data-ux").await?;
1064    ///     let text    = node.text_content().await?;
1065    ///     println!("{ux_type:?}: {text}");
1066    /// # }
1067    /// # Ok(())
1068    /// # }
1069    /// ```
1070    pub async fn query_selector_all(&self, selector: &str) -> Result<Vec<NodeHandle>> {
1071        let elements = timeout(self.cdp_timeout, self.page.find_elements(selector))
1072            .await
1073            .map_err(|_| BrowserError::Timeout {
1074                operation: "PageHandle::query_selector_all".to_string(),
1075                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1076            })?
1077            .map_err(|e| BrowserError::CdpError {
1078                operation: "PageHandle::query_selector_all".to_string(),
1079                message: e.to_string(),
1080            })?;
1081
1082        let selector_arc: Arc<str> = Arc::from(selector);
1083        Ok(elements
1084            .into_iter()
1085            .map(|el| NodeHandle {
1086                element: el,
1087                selector: selector_arc.clone(),
1088                cdp_timeout: self.cdp_timeout,
1089                page: self.page.clone(),
1090            })
1091            .collect())
1092    }
1093
1094    /// Evaluate arbitrary JavaScript and return the result as `T`.
1095    ///
1096    /// # Errors
1097    ///
1098    /// deserialization error.
1099    pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
1100        let script_owned = script.to_string();
1101        timeout(self.cdp_timeout, self.page.evaluate(script))
1102            .await
1103            .map_err(|_| BrowserError::Timeout {
1104                operation: "page.evaluate".to_string(),
1105                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1106            })?
1107            .map_err(|e| BrowserError::ScriptExecutionFailed {
1108                script: script_owned.clone(),
1109                reason: e.to_string(),
1110            })?
1111            .into_value::<T>()
1112            .map_err(|e| BrowserError::ScriptExecutionFailed {
1113                script: script_owned,
1114                reason: e.to_string(),
1115            })
1116    }
1117
1118    ///
1119    /// # Errors
1120    ///
1121    pub async fn save_cookies(
1122        &self,
1123    ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
1124        use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
1125
1126        let url = self
1127            .page
1128            .url()
1129            .await
1130            .map_err(|e| BrowserError::CdpError {
1131                operation: "page.url".to_string(),
1132                message: e.to_string(),
1133            })?
1134            .unwrap_or_default();
1135
1136        timeout(
1137            self.cdp_timeout,
1138            self.page
1139                .execute(GetCookiesParams::builder().urls(vec![url]).build()),
1140        )
1141        .await
1142        .map_err(|_| BrowserError::Timeout {
1143            operation: "Network.getCookies".to_string(),
1144            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1145        })?
1146        .map_err(|e| BrowserError::CdpError {
1147            operation: "Network.getCookies".to_string(),
1148            message: e.to_string(),
1149        })
1150        .map(|r| r.cookies.clone())
1151    }
1152
1153    ///
1154    /// [`SessionSnapshot`][crate::session::SessionSnapshot] and without
1155    /// requiring a direct `chromiumoxide` dependency in calling code.
1156    ///
1157    /// Individual cookie failures are logged as warnings and do not abort the
1158    /// remaining cookies.
1159    ///
1160    /// # Errors
1161    ///
1162    /// call exceeds `cdp_timeout`.
1163    ///
1164    /// # Example
1165    ///
1166    /// ```no_run
1167    /// use stygian_browser::{BrowserPool, BrowserConfig};
1168    /// use stygian_browser::session::SessionCookie;
1169    /// use std::time::Duration;
1170    ///
1171    /// # async fn run() -> stygian_browser::error::Result<()> {
1172    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1173    /// let handle = pool.acquire().await?;
1174    /// let page = handle.browser().expect("valid browser").new_page().await?;
1175    /// let cookies = vec![SessionCookie {
1176    ///     name: "session".to_string(),
1177    ///     value: "abc123".to_string(),
1178    ///     domain: ".example.com".to_string(),
1179    ///     path: "/".to_string(),
1180    ///     expires: -1.0,
1181    ///     http_only: true,
1182    ///     secure: true,
1183    ///     same_site: "Lax".to_string(),
1184    /// }];
1185    /// page.inject_cookies(&cookies).await?;
1186    /// # Ok(())
1187    /// # }
1188    /// ```
1189    pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
1190        use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
1191
1192        for cookie in cookies {
1193            let params = match SetCookieParams::builder()
1194                .name(cookie.name.clone())
1195                .value(cookie.value.clone())
1196                .domain(cookie.domain.clone())
1197                .path(cookie.path.clone())
1198                .http_only(cookie.http_only)
1199                .secure(cookie.secure)
1200                .build()
1201            {
1202                Ok(p) => p,
1203                Err(e) => {
1204                    warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
1205                    continue;
1206                }
1207            };
1208
1209            match timeout(self.cdp_timeout, self.page.execute(params)).await {
1210                Err(_) => {
1211                    warn!(
1212                        cookie = %cookie.name,
1213                        timeout_ms = self.cdp_timeout.as_millis(),
1214                        "Timed out injecting cookie"
1215                    );
1216                }
1217                Ok(Err(e)) => {
1218                    warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
1219                }
1220                Ok(Ok(_)) => {}
1221            }
1222        }
1223
1224        debug!(count = cookies.len(), "Cookies injected");
1225        Ok(())
1226    }
1227
1228    /// Capture a screenshot of the current page as PNG bytes.
1229    ///
1230    /// them in-memory.
1231    ///
1232    /// # Errors
1233    ///
1234    /// command fails, or [`BrowserError::Timeout`] if it exceeds
1235    /// `cdp_timeout`.
1236    ///
1237    /// # Example
1238    ///
1239    /// ```no_run
1240    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1241    /// use std::{time::Duration, fs};
1242    ///
1243    /// # async fn run() -> stygian_browser::error::Result<()> {
1244    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1245    /// let handle = pool.acquire().await?;
1246    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1247    /// let png = page.screenshot().await?;
1248    /// fs::write("screenshot.png", &png).unwrap();
1249    /// # Ok(())
1250    /// # }
1251    /// ```
1252    pub async fn screenshot(&self) -> Result<Vec<u8>> {
1253        use chromiumoxide::page::ScreenshotParams;
1254
1255        let params = ScreenshotParams::builder().full_page(true).build();
1256
1257        timeout(self.cdp_timeout, self.page.screenshot(params))
1258            .await
1259            .map_err(|_| BrowserError::Timeout {
1260                operation: "Page.captureScreenshot".to_string(),
1261                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1262            })?
1263            .map_err(|e| BrowserError::CdpError {
1264                operation: "Page.captureScreenshot".to_string(),
1265                message: e.to_string(),
1266            })
1267    }
1268
1269    /// Borrow the underlying chromiumoxide [`Page`].
1270    pub const fn inner(&self) -> &Page {
1271        &self.page
1272    }
1273
1274    /// Close this page (tab).
1275    ///
1276    pub async fn close(self) -> Result<()> {
1277        timeout(Duration::from_secs(5), self.page.clone().close())
1278            .await
1279            .map_err(|_| BrowserError::Timeout {
1280                operation: "page.close".to_string(),
1281                duration_ms: 5000,
1282            })?
1283            .map_err(|e| BrowserError::CdpError {
1284                operation: "page.close".to_string(),
1285                message: e.to_string(),
1286            })
1287    }
1288}
1289
1290// ─── Stealth diagnostics ──────────────────────────────────────────────────────
1291
1292#[cfg(feature = "stealth")]
1293impl PageHandle {
1294    /// Run all built-in stealth detection checks against the current page.
1295    ///
1296    /// Iterates [`crate::diagnostic::all_checks`], evaluates each check's
1297    /// JavaScript via CDP `Runtime.evaluate`, and returns an aggregate
1298    /// [`crate::diagnostic::DiagnosticReport`].
1299    ///
1300    /// recorded as failing checks and do **not** abort the whole run.
1301    ///
1302    /// # Errors
1303    ///
1304    /// Individual check failures are captured in the report.
1305    ///
1306    /// # Example
1307    ///
1308    /// ```no_run
1309    /// # async fn run() -> stygian_browser::error::Result<()> {
1310    /// use stygian_browser::{BrowserPool, BrowserConfig};
1311    /// use stygian_browser::page::WaitUntil;
1312    /// use std::time::Duration;
1313    ///
1314    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1315    /// let handle = pool.acquire().await?;
1316    /// let browser = handle.browser().expect("valid browser");
1317    /// let mut page = browser.new_page().await?;
1318    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(10)).await?;
1319    ///
1320    /// let report = page.verify_stealth().await?;
1321    /// println!("Stealth: {}/{} checks passed", report.passed_count, report.checks.len());
1322    /// # for failure in report.failures() {
1323    ///     eprintln!("  FAIL  {}: {}", failure.description, failure.details);
1324    /// # }
1325    /// # Ok(())
1326    /// # }
1327    /// ```
1328    pub async fn verify_stealth(&self) -> Result<crate::diagnostic::DiagnosticReport> {
1329        use crate::diagnostic::{CheckResult, DiagnosticReport, all_checks};
1330
1331        let mut results: Vec<CheckResult> = Vec::new();
1332
1333        for check in all_checks() {
1334            let result = match self.eval::<String>(check.script).await {
1335                Ok(json) => check.parse_output(&json),
1336                Err(e) => {
1337                    tracing::warn!(
1338                        check = ?check.id,
1339                        error = %e,
1340                        "stealth check script failed during evaluation"
1341                    );
1342                    CheckResult {
1343                        id: check.id,
1344                        description: check.description.to_string(),
1345                        passed: false,
1346                        details: format!("script error: {e}"),
1347                    }
1348                }
1349            };
1350            tracing::debug!(
1351                check = ?result.id,
1352                passed = result.passed,
1353                details = %result.details,
1354                "stealth check result"
1355            );
1356            results.push(result);
1357        }
1358
1359        Ok(DiagnosticReport::new(results))
1360    }
1361
1362    /// Run stealth checks and attach transport diagnostics (JA3/JA4/HTTP3).
1363    ///
1364    pub async fn verify_stealth_with_transport(
1365        &self,
1366        observed: Option<crate::diagnostic::TransportObservations>,
1367    ) -> Result<crate::diagnostic::DiagnosticReport> {
1368        let report = self.verify_stealth().await?;
1369
1370        let user_agent = match self.eval::<String>("navigator.userAgent").await {
1371            Ok(ua) => ua,
1372            Err(e) => {
1373                tracing::warn!(error = %e, "failed to read navigator.userAgent for transport diagnostics");
1374                String::new()
1375            }
1376        };
1377
1378        let transport = crate::diagnostic::TransportDiagnostic::from_user_agent_and_observations(
1379            &user_agent,
1380            observed.as_ref(),
1381        );
1382
1383        Ok(report.with_transport(transport))
1384    }
1385}
1386
1387// ─── extract feature ─────────────────────────────────────────────────────────
1388
1389#[cfg(feature = "extract")]
1390impl PageHandle {
1391    ///
1392    ///
1393    /// All per-node extractions are driven concurrently via
1394    /// [`futures::future::try_join_all`].
1395    ///
1396    /// # Errors
1397    ///
1398    /// fails, or [`BrowserError::ExtractionFailed`] if any field extraction
1399    /// fails.
1400    ///
1401    /// # Example
1402    ///
1403    /// ```ignore
1404    /// use stygian_browser::extract::Extract;
1405    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1406    /// use std::time::Duration;
1407    ///
1408    /// #[derive(Extract)]
1409    /// struct Link {
1410    ///     href: Option<String>,
1411    /// }
1412    ///
1413    /// # async fn run() -> stygian_browser::error::Result<()> {
1414    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1415    /// let handle = pool.acquire().await?;
1416    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1417    /// page.navigate(
1418    ///     "https://example.com",
1419    ///     WaitUntil::DomContentLoaded,
1420    ///     Duration::from_secs(30),
1421    /// ).await?;
1422    /// let links: Vec<Link> = page.extract_all::<Link>("nav li").await?;
1423    /// # Ok(())
1424    /// # }
1425    /// ```
1426    pub async fn extract_all<T>(&self, selector: &str) -> Result<Vec<T>>
1427    where
1428        T: crate::extract::Extractable,
1429    {
1430        use futures::future::try_join_all;
1431
1432        let nodes = self.query_selector_all(selector).await?;
1433        try_join_all(nodes.iter().map(|n| T::extract_from(n)))
1434            .await
1435            .map_err(BrowserError::ExtractionFailed)
1436    }
1437
1438    /// Try each selector in `selectors` in order and return the extracted
1439    /// results from the **first** selector that matches at least one node.
1440    ///
1441    /// This is useful when a page may use different markup across versions or
1442    /// A/B variants — supply the preferred selector first and progressively
1443    /// wider fallbacks afterwards.
1444    ///
1445    /// Returns an empty `Vec` only when *all* selectors match zero nodes
1446    /// (i.e. the element is genuinely absent from the page).  A non-empty
1447    /// intermediate selector result that then fails during extraction **will**
1448    /// return an error.
1449    ///
1450    /// # Errors
1451    ///
1452    /// Returns [`BrowserError::CdpError`] if the selector query fails, or
1453    /// [`BrowserError::ExtractionFailed`] if a matched node fails extraction.
1454    ///
1455    /// # Example
1456    ///
1457    /// ```ignore
1458    /// use stygian_browser::extract::Extract;
1459    ///
1460    /// #[derive(Extract)]
1461    /// struct Headline { title: String }
1462    ///
1463    /// # async fn run(page: &stygian_browser::PageHandle) -> stygian_browser::error::Result<()> {
1464    /// // Try modern selector first, fall back to legacy markup.
1465    /// let items = page
1466    ///     .extract_all_with_fallback::<Headline>(&["h2.headline", "h2.title", "h2"])
1467    ///     .await?;
1468    /// # Ok(())
1469    /// # }
1470    /// ```
1471    pub async fn extract_all_with_fallback<T>(&self, selectors: &[&str]) -> Result<Vec<T>>
1472    where
1473        T: crate::extract::Extractable,
1474    {
1475        use futures::future::try_join_all;
1476
1477        for &selector in selectors {
1478            let nodes = self.query_selector_all(selector).await?;
1479            if nodes.is_empty() {
1480                continue;
1481            }
1482            return try_join_all(nodes.iter().map(|n| T::extract_from(n)))
1483                .await
1484                .map_err(BrowserError::ExtractionFailed);
1485        }
1486
1487        Ok(vec![])
1488    }
1489
1490    /// Extract from every node matching `selector`, **skipping** nodes where
1491    /// a required field is absent (i.e. [`ExtractionError::Missing`]).
1492    ///
1493    /// Unlike [`extract_all`], this method is lenient about structural
1494    /// mismatches: nodes that fail with [`ExtractionError::Missing`] are
1495    /// silently dropped from the result set.  All other extraction errors
1496    /// (CDP failures, stale nodes, nested errors) still propagate as hard
1497    /// failures.
1498    ///
1499    /// This is useful when scraping heterogeneous lists where some items
1500    /// lack an optional field that your struct treats as required.
1501    ///
1502    /// [`extract_all`]: Self::extract_all
1503    /// [`ExtractionError::Missing`]: crate::extract::ExtractionError::Missing
1504    ///
1505    /// # Errors
1506    ///
1507    /// Returns [`BrowserError::CdpError`] if the selector query fails, or
1508    /// [`BrowserError::ExtractionFailed`] for non-`Missing` extraction errors.
1509    ///
1510    /// # Example
1511    ///
1512    /// ```ignore
1513    /// use stygian_browser::extract::Extract;
1514    ///
1515    /// #[derive(Extract)]
1516    /// struct Price { amount: String }
1517    ///
1518    /// # async fn run(page: &stygian_browser::PageHandle) -> stygian_browser::error::Result<()> {
1519    /// // Products without a price tag are silently skipped.
1520    /// let prices = page.extract_resilient::<Price>(".product").await?;
1521    /// # Ok(())
1522    /// # }
1523    /// ```
1524    pub async fn extract_resilient<T>(&self, selector: &str) -> Result<Vec<T>>
1525    where
1526        T: crate::extract::Extractable,
1527    {
1528        use crate::extract::ExtractionError;
1529
1530        let nodes = self.query_selector_all(selector).await?;
1531        let mut results = Vec::with_capacity(nodes.len());
1532
1533        for node in &nodes {
1534            match T::extract_from(node).await {
1535                Ok(item) => results.push(item),
1536                Err(ExtractionError::Missing { .. }) => {
1537                    tracing::debug!(
1538                        selector,
1539                        "extract_resilient: skipping node with missing required field"
1540                    );
1541                }
1542                Err(e) => return Err(BrowserError::ExtractionFailed(e)),
1543            }
1544        }
1545
1546        Ok(results)
1547    }
1548}
1549
1550// ─── similarity feature ──────────────────────────────────────────────────────
1551
1552#[cfg(feature = "similarity")]
1553impl NodeHandle {
1554    /// node.
1555    ///
1556    /// Issues a single `Runtime.callFunctionOn` JS eval that extracts the tag,
1557    /// class list, attribute names, and body-depth in one round-trip.
1558    ///
1559    /// # Errors
1560    ///
1561    /// invalidated, or [`BrowserError::ScriptExecutionFailed`] if the script
1562    /// produces unexpected output.
1563    pub async fn fingerprint(&self) -> Result<crate::similarity::ElementFingerprint> {
1564        const JS: &str = r"function() {
1565    var el = this;
1566    var tag = el.tagName.toLowerCase();
1567    var classes = Array.prototype.slice.call(el.classList).sort();
1568    var attrNames = Array.prototype.slice.call(el.attributes)
1569        .map(function(a) { return a.name; })
1570        .filter(function(n) { return n !== 'class' && n !== 'id'; })
1571        .sort();
1572    var depth = 0;
1573    var n = el.parentElement;
1574    while (n && n.tagName.toLowerCase() !== 'body') { depth++; n = n.parentElement; }
1575    return JSON.stringify({ tag: tag, classes: classes, attrNames: attrNames, depth: depth });
1576}";
1577
1578        let returns = tokio::time::timeout(self.cdp_timeout, self.element.call_js_fn(JS, true))
1579            .await
1580            .map_err(|_| BrowserError::Timeout {
1581                operation: "NodeHandle::fingerprint".to_string(),
1582                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1583            })?
1584            .map_err(|e| self.cdp_err_or_stale(&e, "fingerprint"))?;
1585
1586        let json_str = returns
1587            .result
1588            .value
1589            .as_ref()
1590            .and_then(|v| v.as_str())
1591            .ok_or_else(|| BrowserError::ScriptExecutionFailed {
1592                script: "NodeHandle::fingerprint".to_string(),
1593                reason: "CDP returned no string value from fingerprint script".to_string(),
1594            })?;
1595
1596        serde_json::from_str::<crate::similarity::ElementFingerprint>(json_str).map_err(|e| {
1597            BrowserError::ScriptExecutionFailed {
1598                script: "NodeHandle::fingerprint".to_string(),
1599                reason: format!("failed to deserialise fingerprint JSON: {e}"),
1600            }
1601        })
1602    }
1603}
1604
1605#[cfg(feature = "similarity")]
1606impl PageHandle {
1607    /// `reference`, scored by [`crate::similarity::SimilarityConfig`].
1608    ///
1609    /// [`NodeHandle::fingerprint`]), then fingerprints every candidate returned
1610    /// [`crate::similarity::jaccard_weighted`] score exceeds
1611    /// `config.threshold`.  Results are ordered by score descending.
1612    ///
1613    /// # Example
1614    ///
1615    /// ```no_run
1616    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1617    /// use stygian_browser::similarity::SimilarityConfig;
1618    /// use std::time::Duration;
1619    ///
1620    /// # async fn run() -> stygian_browser::error::Result<()> {
1621    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1622    /// let handle = pool.acquire().await?;
1623    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1624    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
1625    ///
1626    /// # let nodes = page.query_selector_all("h1").await?;
1627    /// # let reference = nodes.into_iter().next().ok_or(stygian_browser::error::BrowserError::StaleNode { selector: "h1".to_string() })?;
1628    ///     let similar = page.find_similar(&reference, SimilarityConfig::default()).await?;
1629    /// # for m in &similar {
1630    ///         println!("score={:.2}", m.score);
1631    /// # }
1632    /// # Ok(())
1633    /// # }
1634    /// ```
1635    ///
1636    /// # Errors
1637    ///
1638    /// [`BrowserError::ScriptExecutionFailed`] if a scoring script fails.
1639    pub async fn find_similar(
1640        &self,
1641        reference: &NodeHandle,
1642        config: crate::similarity::SimilarityConfig,
1643    ) -> Result<Vec<crate::similarity::SimilarMatch>> {
1644        use crate::similarity::{SimilarMatch, jaccard_weighted};
1645
1646        let ref_fp = reference.fingerprint().await?;
1647        let candidates = self.query_selector_all("*").await?;
1648
1649        let mut matches: Vec<SimilarMatch> = Vec::new();
1650        for node in candidates {
1651            if let Ok(cand_fp) = node.fingerprint().await {
1652                let score = jaccard_weighted(&ref_fp, &cand_fp);
1653                if score >= config.threshold {
1654                    matches.push(SimilarMatch { node, score });
1655                }
1656            }
1657            // Stale / detached nodes are silently skipped.
1658        }
1659
1660        matches.sort_by(|a, b| {
1661            b.score
1662                .partial_cmp(&a.score)
1663                .unwrap_or(std::cmp::Ordering::Equal)
1664        });
1665
1666        if config.max_results > 0 {
1667            matches.truncate(config.max_results);
1668        }
1669
1670        Ok(matches)
1671    }
1672}
1673
1674impl Drop for PageHandle {
1675    fn drop(&mut self) {
1676        warn!("PageHandle dropped without explicit close(); spawning cleanup task");
1677        // chromiumoxide Page does not implement close on Drop, so we spawn
1678        // swap it out. We clone the Page handle (it's Arc-backed internally).
1679        let page = self.page.clone();
1680        tokio::spawn(async move {
1681            let _ = page.close().await;
1682        });
1683    }
1684}
1685
1686// ─── Session warmup & refresh ─────────────────────────────────────────────────
1687
1688/// Simplified, JSON-serializable wait strategy used in [`WarmupOptions`] and
1689/// [`RefreshOptions`].
1690///
1691/// This is a serialization-friendly analogue of [`WaitUntil`].  Use
1692/// [`WarmupWait::into_wait_until`] to convert before calling
1693/// [`PageHandle::navigate`].
1694#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
1695#[serde(rename_all = "snake_case")]
1696pub enum WarmupWait {
1697    /// Wait until the HTML is fully parsed (`DOMContentLoaded`).  This is the
1698    /// default and works for most pages.
1699    #[default]
1700    DomContentLoaded,
1701    /// Wait until there are no more than two in-flight network requests for at
1702    /// least 500 ms after navigation.
1703    NetworkIdle,
1704}
1705
1706impl WarmupWait {
1707    /// Convert into the lower-level [`WaitUntil`] enum.
1708    #[must_use]
1709    pub const fn into_wait_until(self) -> WaitUntil {
1710        match self {
1711            Self::DomContentLoaded => WaitUntil::DomContentLoaded,
1712            Self::NetworkIdle => WaitUntil::NetworkIdle,
1713        }
1714    }
1715}
1716
1717/// Options for [`PageHandle::warmup`].
1718///
1719/// # Example
1720///
1721/// ```
1722/// use stygian_browser::page::{WarmupOptions, WarmupWait};
1723///
1724/// let opts = WarmupOptions {
1725///     url: "https://example.com".to_string(),
1726///     wait: WarmupWait::DomContentLoaded,
1727///     timeout_ms: 30_000,
1728///     stabilize_ms: 500,
1729/// };
1730/// assert_eq!(opts.timeout_ms, 30_000);
1731/// ```
1732#[derive(Debug, Clone, Serialize, Deserialize)]
1733pub struct WarmupOptions {
1734    /// The URL to navigate to during warmup.
1735    pub url: String,
1736    /// Wait strategy applied after the navigation commit (default:
1737    /// `DomContentLoaded`).
1738    #[serde(default)]
1739    pub wait: WarmupWait,
1740    /// Navigation timeout in milliseconds.  Default: `30 000`.
1741    #[serde(default = "WarmupOptions::default_timeout_ms")]
1742    pub timeout_ms: u64,
1743    /// Additional pause after navigation to let dynamic resources (XHR,
1744    /// lazy-loaded images) settle, in milliseconds.  `0` disables the
1745    /// stabilization step (default).
1746    #[serde(default)]
1747    pub stabilize_ms: u64,
1748}
1749
1750impl WarmupOptions {
1751    /// Returns the default navigation timeout (30 000 ms).
1752    #[must_use]
1753    pub const fn default_timeout_ms() -> u64 {
1754        30_000
1755    }
1756}
1757
1758impl Default for WarmupOptions {
1759    fn default() -> Self {
1760        Self {
1761            url: String::new(),
1762            wait: WarmupWait::DomContentLoaded,
1763            timeout_ms: Self::default_timeout_ms(),
1764            stabilize_ms: 0,
1765        }
1766    }
1767}
1768
1769/// Diagnostic report produced by [`PageHandle::warmup`].
1770///
1771/// # Example
1772///
1773/// ```
1774/// use stygian_browser::page::WarmupReport;
1775/// let report = WarmupReport {
1776///     url: "https://example.com".to_string(),
1777///     elapsed_ms: 250,
1778///     status_code: Some(200),
1779///     title: "Example Domain".to_string(),
1780///     stabilized: false,
1781/// };
1782/// assert_eq!(report.status_code, Some(200));
1783/// ```
1784#[derive(Debug, Clone, Serialize, Deserialize)]
1785pub struct WarmupReport {
1786    /// The URL that was warmed.
1787    pub url: String,
1788    /// Elapsed wall-time in milliseconds.
1789    pub elapsed_ms: u64,
1790    /// HTTP status code of the warmup navigation, if captured by the
1791    /// `Network.responseReceived` listener.
1792    pub status_code: Option<u16>,
1793    /// Page title after warmup navigation.
1794    pub title: String,
1795    /// Whether a stabilization pause (`stabilize_ms > 0`) was applied after
1796    /// navigation.
1797    pub stabilized: bool,
1798}
1799
1800/// Options for [`PageHandle::refresh`].
1801///
1802/// # Example
1803///
1804/// ```
1805/// use stygian_browser::page::{RefreshOptions, WarmupWait};
1806///
1807/// let opts = RefreshOptions {
1808///     wait: WarmupWait::DomContentLoaded,
1809///     timeout_ms: 15_000,
1810///     reset_connection: true,
1811/// };
1812/// assert!(opts.reset_connection);
1813/// ```
1814#[derive(Debug, Clone, Serialize, Deserialize)]
1815pub struct RefreshOptions {
1816    /// Wait strategy applied after the reload (default: `DomContentLoaded`).
1817    #[serde(default)]
1818    pub wait: WarmupWait,
1819    /// Reload timeout in milliseconds.  Default: `30 000`.
1820    #[serde(default = "RefreshOptions::default_timeout_ms")]
1821    pub timeout_ms: u64,
1822    /// When `true`, re-navigates to the current URL rather than issuing a
1823    /// browser-level reload.  This signals to the calling code that a new TCP
1824    /// connection is desired while cookies and storage are retained in the
1825    /// browser process.  Default: `false`.
1826    #[serde(default)]
1827    pub reset_connection: bool,
1828}
1829
1830impl RefreshOptions {
1831    /// Returns the default reload timeout (30 000 ms).
1832    #[must_use]
1833    pub const fn default_timeout_ms() -> u64 {
1834        30_000
1835    }
1836}
1837
1838impl Default for RefreshOptions {
1839    fn default() -> Self {
1840        Self {
1841            wait: WarmupWait::DomContentLoaded,
1842            timeout_ms: Self::default_timeout_ms(),
1843            reset_connection: false,
1844        }
1845    }
1846}
1847
1848/// Diagnostic report produced by [`PageHandle::refresh`].
1849///
1850/// # Example
1851///
1852/// ```
1853/// use stygian_browser::page::RefreshReport;
1854/// let report = RefreshReport {
1855///     url: "https://example.com".to_string(),
1856///     elapsed_ms: 180,
1857///     status_code: Some(200),
1858/// };
1859/// assert_eq!(report.elapsed_ms, 180);
1860/// ```
1861#[derive(Debug, Clone, Serialize, Deserialize)]
1862pub struct RefreshReport {
1863    /// URL of the page after the refresh navigation.
1864    pub url: String,
1865    /// Elapsed wall-time in milliseconds.
1866    pub elapsed_ms: u64,
1867    /// HTTP status code of the refresh navigation, if captured.
1868    pub status_code: Option<u16>,
1869}
1870
1871// ─── PageHandle warmup / refresh ──────────────────────────────────────────────
1872
1873impl PageHandle {
1874    /// Warm up a browser session by navigating to `options.url` and
1875    /// optionally waiting for dynamic resources to settle.
1876    ///
1877    /// Warmup is **idempotent**: calling it repeatedly re-navigates and
1878    /// re-warms the same session without adverse side effects.
1879    ///
1880    /// # Errors
1881    ///
1882    /// Returns [`BrowserError::NavigationFailed`] if the navigation times out
1883    /// or the underlying CDP call fails.
1884    ///
1885    /// # Example
1886    ///
1887    /// ```no_run
1888    /// # async fn run() -> stygian_browser::error::Result<()> {
1889    /// use stygian_browser::{BrowserPool, BrowserConfig};
1890    /// use stygian_browser::page::{WarmupOptions, WarmupWait};
1891    ///
1892    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1893    /// let handle = pool.acquire().await?;
1894    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1895    ///
1896    /// let report = page.warmup(WarmupOptions {
1897    ///     url: "https://example.com".to_string(),
1898    ///     wait: WarmupWait::DomContentLoaded,
1899    ///     timeout_ms: 30_000,
1900    ///     stabilize_ms: 500,
1901    /// }).await?;
1902    /// println!("warmed in {}ms: {}", report.elapsed_ms, report.title);
1903    /// handle.release().await;
1904    /// # Ok(())
1905    /// # }
1906    /// ```
1907    pub async fn warmup(&mut self, options: WarmupOptions) -> Result<WarmupReport> {
1908        let start = std::time::Instant::now();
1909        let nav_timeout = Duration::from_millis(options.timeout_ms);
1910        self.navigate(
1911            &options.url,
1912            options.wait.clone().into_wait_until(),
1913            nav_timeout,
1914        )
1915        .await?;
1916        let status_code = self.status_code()?;
1917        let title = self.title().await.unwrap_or_default();
1918        let stabilized = options.stabilize_ms > 0;
1919        if stabilized {
1920            tokio::time::sleep(Duration::from_millis(options.stabilize_ms)).await;
1921        }
1922        let elapsed_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
1923        Ok(WarmupReport {
1924            url: options.url,
1925            elapsed_ms,
1926            status_code,
1927            title,
1928            stabilized,
1929        })
1930    }
1931
1932    /// Refresh the current page, retaining all in-browser session state
1933    /// (cookies, `localStorage`, `sessionStorage`).
1934    ///
1935    /// When `options.reset_connection` is `false` (default) a standard
1936    /// CDP reload is issued.  When `true`, the current URL is re-navigated,
1937    /// which expresses the caller's intent to force a new underlying TCP/TLS
1938    /// connection while keeping all browser-side state intact.
1939    ///
1940    /// Refresh is **idempotent**: repeated calls simply reload the page again.
1941    ///
1942    /// # Errors
1943    ///
1944    /// Returns [`BrowserError::NavigationFailed`] if the current URL cannot be
1945    /// determined or the reload times out.
1946    ///
1947    /// # Example
1948    ///
1949    /// ```no_run
1950    /// # async fn run() -> stygian_browser::error::Result<()> {
1951    /// use stygian_browser::{BrowserPool, BrowserConfig};
1952    /// use stygian_browser::page::{RefreshOptions, WaitUntil};
1953    ///
1954    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1955    /// let handle = pool.acquire().await?;
1956    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1957    /// page.navigate(
1958    ///     "https://example.com",
1959    ///     WaitUntil::DomContentLoaded,
1960    ///     std::time::Duration::from_secs(30),
1961    /// ).await?;
1962    ///
1963    /// let report = page.refresh(RefreshOptions::default()).await?;
1964    /// println!("refreshed in {}ms", report.elapsed_ms);
1965    /// handle.release().await;
1966    /// # Ok(())
1967    /// # }
1968    /// ```
1969    pub async fn refresh(&mut self, options: RefreshOptions) -> Result<RefreshReport> {
1970        let start = std::time::Instant::now();
1971        let nav_timeout = Duration::from_millis(options.timeout_ms);
1972        let wait = options.wait.clone().into_wait_until();
1973        // Resolve the current URL before any navigation changes it.
1974        let current_url = self.url().await?;
1975        if current_url.is_empty() || current_url == "about:blank" {
1976            return Err(BrowserError::NavigationFailed {
1977                url: current_url,
1978                reason: "page has not been navigated yet; call warmup() or navigate() first"
1979                    .to_string(),
1980            });
1981        }
1982        // Both code paths navigate to the same URL.  `reset_connection: true`
1983        // expresses the *intent* to use a new TCP connection; the browser is free
1984        // to reuse or create a new connection as its connection pool dictates.
1985        self.navigate(&current_url, wait, nav_timeout).await?;
1986        let status_code = self.status_code()?;
1987        let url = self.url().await?;
1988        let elapsed_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
1989        Ok(RefreshReport {
1990            url,
1991            elapsed_ms,
1992            status_code,
1993        })
1994    }
1995}
1996
1997// ─── Tests ────────────────────────────────────────────────────────────────────
1998
1999#[cfg(test)]
2000mod tests {
2001    use super::*;
2002
2003    #[test]
2004    fn resource_filter_block_media_blocks_image() {
2005        let filter = ResourceFilter::block_media();
2006        assert!(filter.should_block("Image"));
2007        assert!(filter.should_block("Font"));
2008        assert!(filter.should_block("Stylesheet"));
2009        assert!(filter.should_block("Media"));
2010        assert!(!filter.should_block("Script"));
2011        assert!(!filter.should_block("XHR"));
2012    }
2013
2014    #[test]
2015    fn resource_filter_case_insensitive() {
2016        let filter = ResourceFilter::block_images_and_fonts();
2017        assert!(filter.should_block("image")); // lowercase
2018        assert!(filter.should_block("IMAGE")); // uppercase
2019        assert!(!filter.should_block("Stylesheet"));
2020    }
2021
2022    #[test]
2023    fn resource_filter_builder_chain() {
2024        let filter = ResourceFilter::default()
2025            .block(ResourceType::Image)
2026            .block(ResourceType::Font);
2027        assert!(filter.should_block("Image"));
2028        assert!(filter.should_block("Font"));
2029        assert!(!filter.should_block("Stylesheet"));
2030    }
2031
2032    #[test]
2033    fn resource_filter_dedup_block() {
2034        let filter = ResourceFilter::default()
2035            .block(ResourceType::Image)
2036            .block(ResourceType::Image); // duplicate
2037        assert_eq!(filter.blocked.len(), 1);
2038    }
2039
2040    #[test]
2041    fn resource_filter_is_empty_when_default() {
2042        assert!(ResourceFilter::default().is_empty());
2043        assert!(!ResourceFilter::block_media().is_empty());
2044    }
2045
2046    #[test]
2047    fn wait_until_selector_stores_string() {
2048        let w = WaitUntil::Selector("#foo".to_string());
2049        assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
2050    }
2051
2052    #[test]
2053    fn resource_type_cdp_str() {
2054        assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
2055        assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
2056        assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
2057        assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
2058    }
2059
2060    #[test]
2061    fn page_handle_is_send_sync() {
2062        fn assert_send<T: Send>() {}
2063        fn assert_sync<T: Sync>() {}
2064        assert_send::<PageHandle>();
2065        assert_sync::<PageHandle>();
2066    }
2067
2068    /// Verify the resilient extractor correctly classifies `ExtractionError`
2069    /// variants — `Missing` must be treated as "skip", others as hard errors.
2070    #[cfg(feature = "extract")]
2071    #[test]
2072    fn extraction_error_missing_is_skippable() {
2073        use crate::extract::ExtractionError;
2074
2075        let missing = ExtractionError::Missing {
2076            field: "title",
2077            selector: "h1",
2078        };
2079        assert!(
2080            matches!(missing, ExtractionError::Missing { .. }),
2081            "ExtractionError::Missing should be the skip variant"
2082        );
2083
2084        // Non-Missing variants should NOT match the skip pattern
2085        let nested = ExtractionError::Nested {
2086            field: "link",
2087            source: Box::new(ExtractionError::Missing {
2088                field: "href",
2089                selector: "a",
2090            }),
2091        };
2092        assert!(
2093            !matches!(nested, ExtractionError::Missing { .. }),
2094            "ExtractionError::Nested must not match Missing"
2095        );
2096    }
2097
2098    /// `Option<u16>` are pure-logic invariants testable without a live browser.
2099    #[test]
2100    fn status_code_sentinel_zero_maps_to_none() {
2101        use std::sync::atomic::{AtomicU16, Ordering};
2102        let atom = AtomicU16::new(0);
2103        let code = atom.load(Ordering::Acquire);
2104        assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
2105    }
2106
2107    #[test]
2108    fn status_code_non_zero_maps_to_some() {
2109        use std::sync::atomic::{AtomicU16, Ordering};
2110        for &expected in &[200u16, 301, 404, 503] {
2111            let atom = AtomicU16::new(expected);
2112            let code = atom.load(Ordering::Acquire);
2113            assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
2114        }
2115    }
2116
2117    // ── NodeHandle pure-logic tests ───────────────────────────────────────────
2118
2119    /// `attr_map` relies on `chunks_exact(2)` — verify the pairing logic is
2120    /// correct without a live browser by exercising it directly.
2121    #[test]
2122    fn attr_map_chunking_pairs_correctly() {
2123        let flat = [
2124            "id".to_string(),
2125            "main".to_string(),
2126            "data-ux".to_string(),
2127            "Section".to_string(),
2128            "class".to_string(),
2129            "container".to_string(),
2130        ];
2131        let mut map = std::collections::HashMap::with_capacity(flat.len() / 2);
2132        for pair in flat.chunks_exact(2) {
2133            if let [name, value] = pair {
2134                map.insert(name.clone(), value.clone());
2135            }
2136        }
2137        assert_eq!(map.get("id").map(String::as_str), Some("main"));
2138        assert_eq!(map.get("data-ux").map(String::as_str), Some("Section"));
2139        assert_eq!(map.get("class").map(String::as_str), Some("container"));
2140        assert_eq!(map.len(), 3);
2141    }
2142
2143    /// gracefully — the trailing element is silently ignored.
2144    #[test]
2145    fn attr_map_chunking_ignores_odd_trailing() {
2146        let flat = ["orphan".to_string()]; // no value
2147        let mut map = std::collections::HashMap::new();
2148        for pair in flat.chunks_exact(2) {
2149            if let [name, value] = pair {
2150                map.insert(name.clone(), value.clone());
2151            }
2152        }
2153        assert!(map.is_empty());
2154    }
2155
2156    /// Empty flat list → empty map.
2157    #[test]
2158    fn attr_map_chunking_empty_input() {
2159        let flat: Vec<String> = vec![];
2160        let map: std::collections::HashMap<String, String> = flat
2161            .chunks_exact(2)
2162            .filter_map(|pair| {
2163                if let [name, value] = pair {
2164                    Some((name.clone(), value.clone()))
2165                } else {
2166                    None
2167                }
2168            })
2169            .collect();
2170        assert!(map.is_empty());
2171    }
2172
2173    #[test]
2174    fn ancestors_json_parse_round_trip() -> std::result::Result<(), serde_json::Error> {
2175        let json = r#"["p","article","body","html"]"#;
2176        let result: Vec<String> = serde_json::from_str(json)?;
2177        assert_eq!(result, ["p", "article", "body", "html"]);
2178        Ok(())
2179    }
2180
2181    #[test]
2182    fn ancestors_json_parse_empty() -> std::result::Result<(), serde_json::Error> {
2183        let json = "[]";
2184        let result: Vec<String> = serde_json::from_str(json)?;
2185        assert!(result.is_empty());
2186        Ok(())
2187    }
2188
2189    /// `"div::parent"`) must surface that suffix in its `Display` output so
2190    /// callers can locate the failed traversal in logs.
2191    #[test]
2192    fn traversal_selector_suffix_in_stale_error() {
2193        let e = crate::error::BrowserError::StaleNode {
2194            selector: "div::parent".to_string(),
2195        };
2196        let msg = e.to_string();
2197        assert!(
2198            msg.contains("div::parent"),
2199            "StaleNode display must include the full selector; got: {msg}"
2200        );
2201    }
2202
2203    #[test]
2204    fn traversal_next_suffix_in_stale_error() {
2205        let e = crate::error::BrowserError::StaleNode {
2206            selector: "li.price::next".to_string(),
2207        };
2208        assert!(e.to_string().contains("li.price::next"));
2209    }
2210
2211    #[test]
2212    fn traversal_prev_suffix_in_stale_error() {
2213        let e = crate::error::BrowserError::StaleNode {
2214            selector: "td.label::prev".to_string(),
2215        };
2216        assert!(e.to_string().contains("td.label::prev"));
2217    }
2218
2219    // ── Warmup / Refresh type tests ───────────────────────────────────────────
2220
2221    #[test]
2222    fn warmup_options_defaults() {
2223        let opts = WarmupOptions::default();
2224        assert_eq!(opts.wait, WarmupWait::DomContentLoaded);
2225        assert_eq!(opts.timeout_ms, WarmupOptions::default_timeout_ms());
2226        assert_eq!(opts.stabilize_ms, 0);
2227    }
2228
2229    #[test]
2230    fn warmup_options_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
2231    {
2232        let opts = WarmupOptions {
2233            url: "https://example.com".to_string(),
2234            wait: WarmupWait::NetworkIdle,
2235            timeout_ms: 15_000,
2236            stabilize_ms: 250,
2237        };
2238        let json = serde_json::to_string(&opts)?;
2239        let restored: WarmupOptions = serde_json::from_str(&json)?;
2240        assert_eq!(restored.url, "https://example.com");
2241        assert_eq!(restored.wait, WarmupWait::NetworkIdle);
2242        assert_eq!(restored.timeout_ms, 15_000);
2243        assert_eq!(restored.stabilize_ms, 250);
2244        Ok(())
2245    }
2246
2247    #[test]
2248    fn warmup_wait_default_is_dom_content_loaded() {
2249        assert_eq!(WarmupWait::default(), WarmupWait::DomContentLoaded);
2250    }
2251
2252    #[test]
2253    fn warmup_wait_into_wait_until_variants() {
2254        assert!(matches!(
2255            WarmupWait::DomContentLoaded.into_wait_until(),
2256            WaitUntil::DomContentLoaded
2257        ));
2258        assert!(matches!(
2259            WarmupWait::NetworkIdle.into_wait_until(),
2260            WaitUntil::NetworkIdle
2261        ));
2262    }
2263
2264    #[test]
2265    fn refresh_options_defaults() {
2266        let opts = RefreshOptions::default();
2267        assert_eq!(opts.wait, WarmupWait::DomContentLoaded);
2268        assert_eq!(opts.timeout_ms, RefreshOptions::default_timeout_ms());
2269        assert!(!opts.reset_connection);
2270    }
2271
2272    #[test]
2273    fn refresh_options_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
2274    {
2275        let opts = RefreshOptions {
2276            wait: WarmupWait::NetworkIdle,
2277            timeout_ms: 10_000,
2278            reset_connection: true,
2279        };
2280        let json = serde_json::to_string(&opts)?;
2281        let restored: RefreshOptions = serde_json::from_str(&json)?;
2282        assert_eq!(restored.wait, WarmupWait::NetworkIdle);
2283        assert_eq!(restored.timeout_ms, 10_000);
2284        assert!(restored.reset_connection);
2285        Ok(())
2286    }
2287
2288    #[test]
2289    fn warmup_report_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>> {
2290        let report = WarmupReport {
2291            url: "https://example.com".to_string(),
2292            elapsed_ms: 320,
2293            status_code: Some(200),
2294            title: "Example Domain".to_string(),
2295            stabilized: true,
2296        };
2297        let json = serde_json::to_string(&report)?;
2298        let restored: WarmupReport = serde_json::from_str(&json)?;
2299        assert_eq!(restored.url, "https://example.com");
2300        assert_eq!(restored.elapsed_ms, 320);
2301        assert_eq!(restored.status_code, Some(200));
2302        assert_eq!(restored.title, "Example Domain");
2303        assert!(restored.stabilized);
2304        Ok(())
2305    }
2306
2307    #[test]
2308    fn refresh_report_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
2309    {
2310        let report = RefreshReport {
2311            url: "https://example.com/".to_string(),
2312            elapsed_ms: 180,
2313            status_code: Some(304),
2314        };
2315        let json = serde_json::to_string(&report)?;
2316        let restored: RefreshReport = serde_json::from_str(&json)?;
2317        assert_eq!(restored.url, "https://example.com/");
2318        assert_eq!(restored.elapsed_ms, 180);
2319        assert_eq!(restored.status_code, Some(304));
2320        Ok(())
2321    }
2322
2323    #[test]
2324    fn warmup_options_missing_stabilize_ms_defaults_to_zero()
2325    -> std::result::Result<(), Box<dyn std::error::Error>> {
2326        // stabilize_ms has `#[serde(default)]`; omitting it from JSON should
2327        // deserialize to 0 rather than erroring.
2328        let json = r#"{"url":"https://example.com","timeout_ms":30000}"#;
2329        let opts: WarmupOptions = serde_json::from_str(json)?;
2330        assert_eq!(opts.stabilize_ms, 0);
2331        Ok(())
2332    }
2333
2334    // ── Integration tests (require live Chrome — skipped in CI) ──────────────
2335
2336    /// Warm up a page then immediately extract content from the same origin.
2337    #[test]
2338    #[ignore = "requires live Chrome"]
2339    #[allow(clippy::expect_used)]
2340    fn integration_warmup_then_extraction() {
2341        let rt = tokio::runtime::Runtime::new().expect("tokio runtime");
2342        rt.block_on(async {
2343            use crate::{BrowserConfig, BrowserPool};
2344            let pool = BrowserPool::new(BrowserConfig::default())
2345                .await
2346                .expect("pool");
2347            let handle = pool.acquire().await.expect("handle");
2348            let mut page = handle
2349                .browser()
2350                .expect("browser")
2351                .new_page()
2352                .await
2353                .expect("page");
2354
2355            let report = page
2356                .warmup(WarmupOptions {
2357                    url: "https://example.com".to_string(),
2358                    wait: WarmupWait::DomContentLoaded,
2359                    timeout_ms: 30_000,
2360                    stabilize_ms: 0,
2361                })
2362                .await
2363                .expect("warmup");
2364
2365            assert!(!report.title.is_empty(), "title populated after warmup");
2366            assert!(report.elapsed_ms > 0);
2367
2368            // Confirm the page is still usable for further queries.
2369            let html = page.content().await.expect("content");
2370            assert!(
2371                html.contains("example"),
2372                "page content available after warmup"
2373            );
2374
2375            page.close().await.expect("close");
2376            handle.release().await;
2377        });
2378    }
2379
2380    /// Refresh a page and verify session continuity (URL unchanged, page
2381    /// still navigable).
2382    #[test]
2383    #[ignore = "requires live Chrome"]
2384    #[allow(clippy::expect_used)]
2385    fn integration_refresh_keeps_session_state() {
2386        let rt = tokio::runtime::Runtime::new().expect("tokio runtime");
2387        rt.block_on(async {
2388            use crate::{BrowserConfig, BrowserPool};
2389            let pool = BrowserPool::new(BrowserConfig::default())
2390                .await
2391                .expect("pool");
2392            let handle = pool.acquire().await.expect("handle");
2393            let mut page = handle
2394                .browser()
2395                .expect("browser")
2396                .new_page()
2397                .await
2398                .expect("page");
2399
2400            page.navigate(
2401                "https://example.com",
2402                WaitUntil::DomContentLoaded,
2403                Duration::from_secs(30),
2404            )
2405            .await
2406            .expect("initial navigate");
2407
2408            let report = page
2409                .refresh(RefreshOptions::default())
2410                .await
2411                .expect("refresh");
2412
2413            assert!(
2414                report.url.contains("example.com"),
2415                "URL retained after refresh; got: {}",
2416                report.url
2417            );
2418            assert!(report.elapsed_ms > 0);
2419
2420            page.close().await.expect("close");
2421            handle.release().await;
2422        });
2423    }
2424}