Skip to main content

stygian_browser/
page.rs

1//!
2//! ## Resource blocking
3//!
4//! ## Wait strategies
5//!
6//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
7//! - `DomContentLoaded` — fires when the HTML is parsed
8//!
9//! # Example
10//!
11//! ```no_run
12//! use stygian_browser::{BrowserPool, BrowserConfig};
13//! use stygian_browser::page::{ResourceFilter, WaitUntil};
14//! use std::time::Duration;
15//!
16//! # async fn run() -> stygian_browser::error::Result<()> {
17//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
18//! let handle = pool.acquire().await?;
19//!
20//! let mut page = handle.browser().expect("valid browser").new_page().await?;
21//! page.set_resource_filter(ResourceFilter::block_media()).await?;
22//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
23//! let title = page.title().await?;
24//! println!("title: {title}");
25//! handle.release().await;
26//! # Ok(())
27//! # }
28//! ```
29
30use std::collections::HashMap;
31use std::sync::{
32    Arc,
33    atomic::{AtomicU16, Ordering},
34};
35use std::time::Duration;
36
37use chromiumoxide::Page;
38use tokio::time::timeout;
39use tracing::{debug, warn};
40
41use crate::error::{BrowserError, Result};
42
43// ─── ResourceType ─────────────────────────────────────────────────────────────
44
45/// CDP resource types that can be intercepted.
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub enum ResourceType {
48    /// `<img>`, `<picture>`, background images
49    Image,
50    /// Web fonts loaded via CSS `@font-face`
51    Font,
52    /// External CSS stylesheets
53    Stylesheet,
54    /// Media files (audio/video)
55    Media,
56}
57
58impl ResourceType {
59    pub const fn as_cdp_str(&self) -> &'static str {
60        match self {
61            Self::Image => "Image",
62            Self::Font => "Font",
63            Self::Stylesheet => "Stylesheet",
64            Self::Media => "Media",
65        }
66    }
67}
68
69// ─── ResourceFilter ───────────────────────────────────────────────────────────
70
71///
72/// # Example
73///
74/// ```
75/// use stygian_browser::page::ResourceFilter;
76/// let filter = ResourceFilter::block_media();
77/// assert!(filter.should_block("Image"));
78/// ```
79#[derive(Debug, Clone, Default)]
80pub struct ResourceFilter {
81    blocked: Vec<ResourceType>,
82}
83
84impl ResourceFilter {
85    /// Block all media resources (images, fonts, CSS, audio/video).
86    pub fn block_media() -> Self {
87        Self {
88            blocked: vec![
89                ResourceType::Image,
90                ResourceType::Font,
91                ResourceType::Stylesheet,
92                ResourceType::Media,
93            ],
94        }
95    }
96
97    pub fn block_images_and_fonts() -> Self {
98        Self {
99            blocked: vec![ResourceType::Image, ResourceType::Font],
100        }
101    }
102
103    #[must_use]
104    pub fn block(mut self, resource: ResourceType) -> Self {
105        if !self.blocked.contains(&resource) {
106            self.blocked.push(resource);
107        }
108        self
109    }
110
111    pub fn should_block(&self, cdp_type: &str) -> bool {
112        self.blocked
113            .iter()
114            .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
115    }
116
117    pub const fn is_empty(&self) -> bool {
118        self.blocked.is_empty()
119    }
120}
121
122// ─── WaitUntil ────────────────────────────────────────────────────────────────
123
124///
125/// # Example
126///
127/// ```
128/// use stygian_browser::page::WaitUntil;
129/// ```
130/// Specifies what condition to wait for after a page navigation.
131#[derive(Debug, Clone)]
132pub enum WaitUntil {
133    /// Fires when the initial HTML is fully parsed, without waiting for
134    /// subresources such as images and stylesheets to finish loading.
135    DomContentLoaded,
136    NetworkIdle,
137    Selector(String),
138}
139
140// ─── NodeHandle ───────────────────────────────────────────────────────────────
141
142///
143/// more CDP `Runtime.callFunctionOn` calls against the held V8 remote object
144/// reference — no HTML serialisation occurs.
145///
146/// A handle becomes **stale** after page navigation or if the underlying DOM
147/// node is removed.  Stale calls return [`BrowserError::StaleNode`] so callers
148/// can distinguish them from other CDP failures.
149///
150/// # Example
151///
152/// ```no_run
153/// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
154/// use std::time::Duration;
155///
156/// # async fn run() -> stygian_browser::error::Result<()> {
157/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
158/// let handle = pool.acquire().await?;
159/// let mut page = handle.browser().expect("valid browser").new_page().await?;
160/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
161/// # let nodes = page.query_selector_all("a").await?;
162/// # for node in &nodes {
163///     let href = node.attr("href").await?;
164///     let text = node.text_content().await?;
165///     println!("{text}: {href:?}");
166/// # }
167/// # Ok(())
168/// # }
169/// ```
170pub struct NodeHandle {
171    element: chromiumoxide::element::Element,
172    /// Shared via `Arc<str>` so all handles from a single query reuse the
173    /// same allocation rather than cloning a `String` per node.
174    selector: Arc<str>,
175    cdp_timeout: Duration,
176    /// during DOM traversal (parent / sibling navigation).
177    page: chromiumoxide::Page,
178}
179
180impl NodeHandle {
181    /// Return a single attribute value, or `None` if the attribute is absent.
182    ///
183    /// Issues one `Runtime.callFunctionOn` CDP call (`el.getAttribute(name)`).
184    ///
185    /// # Errors
186    ///
187    /// invalidated, or [`BrowserError::Timeout`] / [`BrowserError::CdpError`]
188    /// on transport-level failures.
189    pub async fn attr(&self, name: &str) -> Result<Option<String>> {
190        timeout(self.cdp_timeout, self.element.attribute(name))
191            .await
192            .map_err(|_| BrowserError::Timeout {
193                operation: "NodeHandle::attr".to_string(),
194                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
195            })?
196            .map_err(|e| self.cdp_err_or_stale(&e, "attr"))
197    }
198
199    /// Return all attributes as a `HashMap<name, value>` in a **single**
200    /// CDP round-trip.
201    ///
202    /// Uses `DOM.getAttributes` (via the chromiumoxide `attributes()` API)
203    /// which returns a flat `[name, value, name, value, …]` list from the node
204    /// description — no per-attribute calls are needed.
205    ///
206    /// # Errors
207    ///
208    /// invalidated.
209    pub async fn attr_map(&self) -> Result<HashMap<String, String>> {
210        let flat = timeout(self.cdp_timeout, self.element.attributes())
211            .await
212            .map_err(|_| BrowserError::Timeout {
213                operation: "NodeHandle::attr_map".to_string(),
214                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
215            })?
216            .map_err(|e| self.cdp_err_or_stale(&e, "attr_map"))?;
217
218        let mut map = HashMap::with_capacity(flat.len() / 2);
219        for pair in flat.chunks_exact(2) {
220            if let [name, value] = pair {
221                map.insert(name.clone(), value.clone());
222            }
223        }
224        Ok(map)
225    }
226
227    /// Return the element's `textContent` (all text inside, no markup).
228    ///
229    /// Reads the DOM `textContent` property via a single JS eval — this is the
230    /// raw text concatenation of all descendant text nodes, independent of
231    /// layout or visibility (unlike `innerText`).
232    ///
233    ///
234    /// # Errors
235    ///
236    /// invalidated.
237    pub async fn text_content(&self) -> Result<String> {
238        let returns = timeout(
239            self.cdp_timeout,
240            self.element
241                .call_js_fn(r"function() { return this.textContent ?? ''; }", true),
242        )
243        .await
244        .map_err(|_| BrowserError::Timeout {
245            operation: "NodeHandle::text_content".to_string(),
246            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
247        })?
248        .map_err(|e| self.cdp_err_or_stale(&e, "text_content"))?;
249
250        Ok(returns
251            .result
252            .value
253            .as_ref()
254            .and_then(|v| v.as_str())
255            .unwrap_or("")
256            .to_string())
257    }
258
259    /// Return the element's `innerHTML`.
260    ///
261    ///
262    /// # Errors
263    ///
264    /// invalidated.
265    pub async fn inner_html(&self) -> Result<String> {
266        timeout(self.cdp_timeout, self.element.inner_html())
267            .await
268            .map_err(|_| BrowserError::Timeout {
269                operation: "NodeHandle::inner_html".to_string(),
270                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
271            })?
272            .map_err(|e| self.cdp_err_or_stale(&e, "inner_html"))
273            .map(Option::unwrap_or_default)
274    }
275
276    /// Return the element's `outerHTML`.
277    ///
278    ///
279    /// # Errors
280    ///
281    /// invalidated.
282    pub async fn outer_html(&self) -> Result<String> {
283        timeout(self.cdp_timeout, self.element.outer_html())
284            .await
285            .map_err(|_| BrowserError::Timeout {
286                operation: "NodeHandle::outer_html".to_string(),
287                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
288            })?
289            .map_err(|e| self.cdp_err_or_stale(&e, "outer_html"))
290            .map(Option::unwrap_or_default)
291    }
292
293    ///
294    /// Executes a single `Runtime.callFunctionOn` JavaScript function that
295    /// walks `parentElement` and collects tag names — no repeated CDP calls.
296    ///
297    /// ```text
298    /// ["p", "article", "body", "html"]
299    /// ```
300    ///
301    /// # Errors
302    ///
303    /// invalidated, or [`BrowserError::ScriptExecutionFailed`] when CDP
304    pub async fn ancestors(&self) -> Result<Vec<String>> {
305        let returns = timeout(
306            self.cdp_timeout,
307            self.element.call_js_fn(
308                r"function() {
309                    const a = [];
310                    let n = this.parentElement;
311                    while (n) { a.push(n.tagName.toLowerCase()); n = n.parentElement; }
312                    return a;
313                }",
314                true,
315            ),
316        )
317        .await
318        .map_err(|_| BrowserError::Timeout {
319            operation: "NodeHandle::ancestors".to_string(),
320            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
321        })?
322        .map_err(|e| self.cdp_err_or_stale(&e, "ancestors"))?;
323
324        // With returnByValue=true and an array return, CDP delivers the value
325        // as a JSON array directly — no JSON.stringify/re-parse needed.
326        // A missing or wrong-type value indicates an unexpected CDP failure.
327        let arr = returns
328            .result
329            .value
330            .as_ref()
331            .and_then(|v| v.as_array())
332            .ok_or_else(|| BrowserError::ScriptExecutionFailed {
333                script: "NodeHandle::ancestors".to_string(),
334                reason: "CDP returned no value or a non-array value for ancestors()".to_string(),
335            })?;
336
337        arr.iter()
338            .map(|v| {
339                v.as_str().map(ToString::to_string).ok_or_else(|| {
340                    BrowserError::ScriptExecutionFailed {
341                        script: "NodeHandle::ancestors".to_string(),
342                        reason: format!("ancestor entry is not a string: {v}"),
343                    }
344                })
345            })
346            .collect()
347    }
348
349    ///
350    ///
351    ///
352    /// # Errors
353    ///
354    /// invalidated, or [`BrowserError::CdpError`] on transport failure.
355    pub async fn children_matching(&self, selector: &str) -> Result<Vec<Self>> {
356        let elements = timeout(self.cdp_timeout, self.element.find_elements(selector))
357            .await
358            .map_err(|_| BrowserError::Timeout {
359                operation: "NodeHandle::children_matching".to_string(),
360                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
361            })?
362            .map_err(|e| self.cdp_err_or_stale(&e, "children_matching"))?;
363
364        let selector_arc: Arc<str> = Arc::from(selector);
365        Ok(elements
366            .into_iter()
367            .map(|el| Self {
368                element: el,
369                selector: selector_arc.clone(),
370                cdp_timeout: self.cdp_timeout,
371                page: self.page.clone(),
372            })
373            .collect())
374    }
375
376    /// Return the immediate parent element, or `None` if this element has no
377    /// parent (i.e. it is the document root).
378    ///
379    /// Issues a single `Runtime.callFunctionOn` CDP call that temporarily tags
380    /// the parent element with a unique attribute, then resolves it via a
381    /// CSS attribute selector.
382    ///
383    /// # Errors
384    ///
385    /// Returns an error if the CDP call fails or the page handle is invalidated.
386    ///
387    /// # Example
388    ///
389    /// ```no_run
390    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
391    /// use std::time::Duration;
392    ///
393    /// # async fn run() -> stygian_browser::error::Result<()> {
394    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
395    /// let handle = pool.acquire().await?;
396    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
397    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
398    /// # let nodes = page.query_selector_all("a").await?;
399    /// if let Some(parent) = nodes[0].parent().await? {
400    ///     let html = parent.outer_html().await?;
401    ///     println!("parent: {}", &html[..html.len().min(80)]);
402    /// }
403    /// # Ok(())
404    /// # }
405    /// ```
406    pub async fn parent(&self) -> Result<Option<Self>> {
407        let attr = format!(
408            "data-stygian-t-{}",
409            ulid::Ulid::new().to_string().to_lowercase()
410        );
411        let js = format!(
412            "function() {{ \
413                var t = this.parentElement; \
414                if (!t) {{ return false; }} \
415                t.setAttribute('{attr}', '1'); \
416                return true; \
417            }}"
418        );
419        self.call_traversal(&js, &attr, "parent").await
420    }
421
422    /// Return the next element sibling, or `None` if this element is the last
423    /// child of its parent.
424    ///
425    /// Uses `nextElementSibling` (skips text/comment nodes).
426    ///
427    /// # Errors
428    ///
429    /// invalidated.
430    ///
431    /// # Example
432    ///
433    /// ```no_run
434    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
435    /// use std::time::Duration;
436    ///
437    /// # async fn run() -> stygian_browser::error::Result<()> {
438    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
439    /// let handle = pool.acquire().await?;
440    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
441    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
442    /// # let nodes = page.query_selector_all("a").await?;
443    /// if let Some(next) = nodes[0].next_sibling().await? {
444    ///     println!("next sibling: {}", next.text_content().await?);
445    /// }
446    /// # Ok(())
447    /// # }
448    /// ```
449    pub async fn next_sibling(&self) -> Result<Option<Self>> {
450        let attr = format!(
451            "data-stygian-t-{}",
452            ulid::Ulid::new().to_string().to_lowercase()
453        );
454        let js = format!(
455            "function() {{ \
456                var t = this.nextElementSibling; \
457                if (!t) {{ return false; }} \
458                t.setAttribute('{attr}', '1'); \
459                return true; \
460            }}"
461        );
462        self.call_traversal(&js, &attr, "next").await
463    }
464
465    /// Return the previous element sibling, or `None` if this element is the
466    /// first child of its parent.
467    ///
468    /// Uses `previousElementSibling` (skips text/comment nodes).
469    ///
470    /// # Errors
471    ///
472    /// invalidated.
473    ///
474    /// # Example
475    ///
476    /// ```no_run
477    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
478    /// use std::time::Duration;
479    ///
480    /// # async fn run() -> stygian_browser::error::Result<()> {
481    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
482    /// let handle = pool.acquire().await?;
483    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
484    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
485    /// # let nodes = page.query_selector_all("a").await?;
486    /// if let Some(prev) = nodes[1].previous_sibling().await? {
487    ///     println!("prev sibling: {}", prev.text_content().await?);
488    /// }
489    /// # Ok(())
490    /// # }
491    /// ```
492    pub async fn previous_sibling(&self) -> Result<Option<Self>> {
493        let attr = format!(
494            "data-stygian-t-{}",
495            ulid::Ulid::new().to_string().to_lowercase()
496        );
497        let js = format!(
498            "function() {{ \
499                var t = this.previousElementSibling; \
500                if (!t) {{ return false; }} \
501                t.setAttribute('{attr}', '1'); \
502                return true; \
503            }}"
504        );
505        self.call_traversal(&js, &attr, "prev").await
506    }
507
508    /// Shared traversal implementation used by [`parent`], [`next_sibling`],
509    /// and [`previous_sibling`].
510    ///
511    /// The caller provides a JS function that:
512    /// 1. Computes the traversal target (for example, the parent, next
513    ///    sibling, or previous sibling) and stores it in a local variable.
514    /// 2. If the target is non-null, sets a unique attribute (`attr_name`)
515    ///    on it and returns `true`.
516    /// 3. Returns `false` when the target is null (no such neighbour).
517    ///
518    /// This helper then resolves the tagged element from the document root,
519    /// removes the temporary attribute, and wraps the result in a
520    /// `NodeHandle`.
521    ///
522    /// [`parent`]: Self::parent
523    /// [`next_sibling`]: Self::next_sibling
524    /// [`previous_sibling`]: Self::previous_sibling
525    async fn call_traversal(
526        &self,
527        js_fn: &str,
528        attr_name: &str,
529        selector_suffix: &str,
530    ) -> Result<Option<Self>> {
531        // Step 1: Run the JS that tags the target element and reports null/non-null.
532        let op_tag = format!("NodeHandle::{selector_suffix}::tag");
533        let returns = timeout(self.cdp_timeout, self.element.call_js_fn(js_fn, false))
534            .await
535            .map_err(|_| BrowserError::Timeout {
536                operation: op_tag.clone(),
537                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
538            })?
539            .map_err(|e| self.cdp_err_or_stale(&e, selector_suffix))?;
540
541        // JS returns false → no such neighbour.
542        let has_target = returns
543            .result
544            .value
545            .as_ref()
546            .and_then(serde_json::Value::as_bool)
547            .unwrap_or(false);
548        if !has_target {
549            return Ok(None);
550        }
551
552        let css = format!("[{attr_name}]");
553        let op_resolve = format!("NodeHandle::{selector_suffix}::resolve");
554        let element = timeout(self.cdp_timeout, self.page.find_element(css))
555            .await
556            .map_err(|_| BrowserError::Timeout {
557                operation: op_resolve.clone(),
558                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
559            })?
560            .map_err(|e| BrowserError::CdpError {
561                operation: op_resolve,
562                message: e.to_string(),
563            })?;
564
565        // is non-fatal — it leaves a harmless stale attribute in the DOM).
566        let cleanup = format!("function() {{ this.removeAttribute('{attr_name}'); }}");
567        let _ = element.call_js_fn(cleanup, false).await;
568
569        let new_selector: Arc<str> =
570            Arc::from(format!("{}::{selector_suffix}", self.selector).as_str());
571        Ok(Some(Self {
572            element,
573            selector: new_selector,
574            cdp_timeout: self.cdp_timeout,
575            page: self.page.clone(),
576        }))
577    }
578
579    /// (when the remote object reference has been invalidated) or
580    fn cdp_err_or_stale(
581        &self,
582        err: &chromiumoxide::error::CdpError,
583        operation: &str,
584    ) -> BrowserError {
585        let msg = err.to_string();
586        if msg.contains("Cannot find object with id")
587            || msg.contains("context with specified id")
588            || msg.contains("Cannot find context")
589        {
590            BrowserError::StaleNode {
591                selector: self.selector.to_string(),
592            }
593        } else {
594            BrowserError::CdpError {
595                operation: operation.to_string(),
596                message: msg,
597            }
598        }
599    }
600}
601
602// ─── PageHandle ───────────────────────────────────────────────────────────────
603
604///
605///
606/// # Example
607///
608/// ```no_run
609/// use stygian_browser::{BrowserPool, BrowserConfig};
610/// use stygian_browser::page::WaitUntil;
611/// use std::time::Duration;
612///
613/// # async fn run() -> stygian_browser::error::Result<()> {
614/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
615/// let handle = pool.acquire().await?;
616/// let mut page = handle.browser().expect("valid browser").new_page().await?;
617/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
618/// let html = page.content().await?;
619/// drop(page); // closes the tab
620/// handle.release().await;
621/// # Ok(())
622/// # }
623/// ```
624pub struct PageHandle {
625    page: Page,
626    cdp_timeout: Duration,
627    /// HTTP status code of the most recent main-frame navigation, or `0` if not
628    last_status_code: Arc<AtomicU16>,
629    /// Background task processing `Fetch.requestPaused` events. Aborted and
630    /// replaced each time `set_resource_filter` is called.
631    resource_filter_task: Option<tokio::task::JoinHandle<()>>,
632}
633
634impl PageHandle {
635    /// Wrap a raw chromiumoxide [`Page`] in a handle.
636    pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
637        Self {
638            page,
639            cdp_timeout,
640            last_status_code: Arc::new(AtomicU16::new(0)),
641            resource_filter_task: None,
642        }
643    }
644
645    ///
646    /// # Errors
647    ///
648    /// the CDP call fails.
649    pub async fn navigate(
650        &mut self,
651        url: &str,
652        condition: WaitUntil,
653        nav_timeout: Duration,
654    ) -> Result<()> {
655        self.setup_status_capture().await;
656        timeout(
657            nav_timeout,
658            self.navigate_inner(url, condition, nav_timeout),
659        )
660        .await
661        .map_err(|_| BrowserError::NavigationFailed {
662            url: url.to_string(),
663            reason: format!("navigation timed out after {nav_timeout:?}"),
664        })?
665    }
666
667    /// Reset the last status code and wire up the `Network.responseReceived`
668    /// so that a missing network domain never blocks navigation.
669    async fn setup_status_capture(&self) {
670        use chromiumoxide::cdp::browser_protocol::network::{
671            EventResponseReceived, ResourceType as NetworkResourceType,
672        };
673        use futures::StreamExt;
674
675        // Reset so a stale code is not returned if the new navigation fails
676        self.last_status_code.store(0, Ordering::Release);
677
678        let page_for_listener = self.page.clone();
679        let status_capture = Arc::clone(&self.last_status_code);
680        match page_for_listener
681            .event_listener::<EventResponseReceived>()
682            .await
683        {
684            Ok(mut stream) => {
685                tokio::spawn(async move {
686                    while let Some(event) = stream.next().await {
687                        if event.r#type == NetworkResourceType::Document {
688                            let code = u16::try_from(event.response.status).unwrap_or(0);
689                            if code > 0 {
690                                status_capture.store(code, Ordering::Release);
691                            }
692                            break;
693                        }
694                    }
695                });
696            }
697            Err(e) => warn!("status-code capture unavailable: {e}"),
698        }
699    }
700
701    /// described in issue #7.
702    async fn navigate_inner(
703        &self,
704        url: &str,
705        condition: WaitUntil,
706        nav_timeout: Duration,
707    ) -> Result<()> {
708        use chromiumoxide::cdp::browser_protocol::page::{
709            EventDomContentEventFired, EventLoadEventFired,
710        };
711        use futures::StreamExt;
712
713        let url_owned = url.to_string();
714
715        let mut dom_events = match &condition {
716            WaitUntil::DomContentLoaded => Some(
717                self.page
718                    .event_listener::<EventDomContentEventFired>()
719                    .await
720                    .map_err(|e| BrowserError::NavigationFailed {
721                        url: url_owned.clone(),
722                        reason: e.to_string(),
723                    })?,
724            ),
725            _ => None,
726        };
727
728        let mut load_events = match &condition {
729            WaitUntil::NetworkIdle => Some(
730                self.page
731                    .event_listener::<EventLoadEventFired>()
732                    .await
733                    .map_err(|e| BrowserError::NavigationFailed {
734                        url: url_owned.clone(),
735                        reason: e.to_string(),
736                    })?,
737            ),
738            _ => None,
739        };
740
741        let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
742            Some(self.subscribe_inflight_counter().await)
743        } else {
744            None
745        };
746
747        self.page
748            .goto(url)
749            .await
750            .map_err(|e| BrowserError::NavigationFailed {
751                url: url_owned.clone(),
752                reason: e.to_string(),
753            })?;
754
755        match &condition {
756            WaitUntil::DomContentLoaded => {
757                if let Some(ref mut events) = dom_events {
758                    let _ = events.next().await;
759                }
760            }
761            WaitUntil::NetworkIdle => {
762                if let Some(ref mut events) = load_events {
763                    let _ = events.next().await;
764                }
765                if let Some(ref counter) = inflight {
766                    Self::wait_network_idle(counter).await;
767                }
768            }
769            WaitUntil::Selector(css) => {
770                self.wait_for_selector(css, nav_timeout).await?;
771            }
772        }
773        Ok(())
774    }
775
776    /// Spawn three detached tasks that maintain a signed in-flight request
777    /// counter via `Network.requestWillBeSent` (+1) and
778    /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
779    async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
780        use std::sync::atomic::AtomicI32;
781
782        use chromiumoxide::cdp::browser_protocol::network::{
783            EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
784        };
785        use futures::StreamExt;
786
787        let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
788        let pairs: [(Arc<AtomicI32>, i32); 3] = [
789            (Arc::clone(&counter), 1),
790            (Arc::clone(&counter), -1),
791            (Arc::clone(&counter), -1),
792        ];
793        let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
794
795        macro_rules! spawn_tracker {
796            ($page:expr, $event:ty, $c:expr, $delta:expr) => {
797                match $page.event_listener::<$event>().await {
798                    Ok(mut s) => {
799                        let c = $c;
800                        let d = $delta;
801                        tokio::spawn(async move {
802                            while s.next().await.is_some() {
803                                c.fetch_add(d, Ordering::Relaxed);
804                            }
805                        });
806                    }
807                    Err(e) => warn!("network-idle tracker unavailable: {e}"),
808                }
809            };
810        }
811
812        let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
813        spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
814        spawn_tracker!(p2, EventLoadingFinished, c2, d2);
815        spawn_tracker!(p3, EventLoadingFailed, c3, d3);
816
817        counter
818    }
819
820    async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
821        const IDLE_THRESHOLD: i32 = 2;
822        const SETTLE: Duration = Duration::from_millis(500);
823        loop {
824            if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
825                tokio::time::sleep(SETTLE).await;
826                if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
827                    break;
828                }
829            } else {
830                tokio::time::sleep(Duration::from_millis(50)).await;
831            }
832        }
833    }
834
835    ///
836    /// # Errors
837    ///
838    /// within the given timeout.
839    pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
840        let selector_owned = selector.to_string();
841        let poll = async {
842            loop {
843                if self.page.find_element(selector_owned.clone()).await.is_ok() {
844                    return Ok(());
845                }
846                tokio::time::sleep(Duration::from_millis(100)).await;
847            }
848        };
849
850        timeout(wait_timeout, poll)
851            .await
852            .map_err(|_| BrowserError::NavigationFailed {
853                url: String::new(),
854                reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
855            })?
856    }
857
858    ///
859    /// Enables `Fetch` interception and spawns a background task that continues
860    /// allowed requests and fails blocked ones with `BlockedByClient`. Any
861    /// previously set filter task is cancelled first.
862    ///
863    /// # Errors
864    ///
865    pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
866        use chromiumoxide::cdp::browser_protocol::fetch::{
867            ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
868            RequestPattern,
869        };
870        use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
871        use futures::StreamExt as _;
872
873        if filter.is_empty() {
874            return Ok(());
875        }
876
877        // Cancel any previously running filter task.
878        if let Some(task) = self.resource_filter_task.take() {
879            task.abort();
880        }
881
882        let pattern = RequestPattern::builder().url_pattern("*").build();
883        let params = EnableParams::builder()
884            .patterns(vec![pattern])
885            .handle_auth_requests(false)
886            .build();
887
888        timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
889            .await
890            .map_err(|_| BrowserError::Timeout {
891                operation: "Fetch.enable".to_string(),
892                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
893            })?
894            .map_err(|e| BrowserError::CdpError {
895                operation: "Fetch.enable".to_string(),
896                message: e.to_string(),
897            })?;
898
899        // is never blocked. Without this handler Chrome holds every intercepted
900        // request indefinitely and the page hangs.
901        let mut events = self
902            .page
903            .event_listener::<EventRequestPaused>()
904            .await
905            .map_err(|e| BrowserError::CdpError {
906                operation: "Fetch.requestPaused subscribe".to_string(),
907                message: e.to_string(),
908            })?;
909
910        let page = self.page.clone();
911        debug!("Resource filter active: {:?}", filter);
912        let task = tokio::spawn(async move {
913            while let Some(event) = events.next().await {
914                let request_id = event.request_id.clone();
915                if filter.should_block(event.resource_type.as_ref()) {
916                    let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
917                    let _ = page.execute(params).await;
918                } else {
919                    let _ = page.execute(ContinueRequestParams::new(request_id)).await;
920                }
921            }
922        });
923
924        self.resource_filter_task = Some(task);
925        Ok(())
926    }
927
928    /// Return the current page URL (post-navigation, post-redirect).
929    ///
930    /// internally by [`save_cookies`](Self::save_cookies); no extra network
931    /// request is made.  Returns an empty string if the URL is not yet set
932    ///
933    /// # Errors
934    ///
935    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
936    ///
937    /// # Example
938    ///
939    /// ```no_run
940    /// use stygian_browser::{BrowserPool, BrowserConfig};
941    /// use stygian_browser::page::WaitUntil;
942    /// use std::time::Duration;
943    ///
944    /// # async fn run() -> stygian_browser::error::Result<()> {
945    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
946    /// let handle = pool.acquire().await?;
947    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
948    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
949    /// let url = page.url().await?;
950    /// println!("Final URL after redirects: {url}");
951    /// # Ok(())
952    /// # }
953    /// ```
954    pub async fn url(&self) -> Result<String> {
955        timeout(self.cdp_timeout, self.page.url())
956            .await
957            .map_err(|_| BrowserError::Timeout {
958                operation: "page.url".to_string(),
959                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
960            })?
961            .map_err(|e| BrowserError::CdpError {
962                operation: "page.url".to_string(),
963                message: e.to_string(),
964            })
965            .map(Option::unwrap_or_default)
966    }
967
968    /// Return the HTTP status code of the most recent main-frame navigation.
969    ///
970    /// The status is captured from the `Network.responseReceived` CDP event
971    /// wired up inside [`navigate`](Self::navigate), so it reflects the
972    /// *final* response after any server-side redirects.
973    ///
974    /// navigations, when [`navigate`](Self::navigate) has not yet been called,
975    /// or if the network event subscription failed.
976    ///
977    /// # Errors
978    ///
979    ///
980    /// # Example
981    ///
982    /// ```no_run
983    /// use stygian_browser::{BrowserPool, BrowserConfig};
984    /// use stygian_browser::page::WaitUntil;
985    /// use std::time::Duration;
986    ///
987    /// # async fn run() -> stygian_browser::error::Result<()> {
988    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
989    /// let handle = pool.acquire().await?;
990    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
991    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
992    /// if let Some(code) = page.status_code()? {
993    ///     println!("HTTP {code}");
994    /// }
995    /// # Ok(())
996    /// # }
997    /// ```
998    pub fn status_code(&self) -> Result<Option<u16>> {
999        let code = self.last_status_code.load(Ordering::Acquire);
1000        Ok(if code == 0 { None } else { Some(code) })
1001    }
1002
1003    /// Return the page's `<title>` text.
1004    ///
1005    /// # Errors
1006    ///
1007    pub async fn title(&self) -> Result<String> {
1008        timeout(self.cdp_timeout, self.page.get_title())
1009            .await
1010            .map_err(|_| BrowserError::Timeout {
1011                operation: "get_title".to_string(),
1012                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1013            })?
1014            .map_err(|e| BrowserError::ScriptExecutionFailed {
1015                script: "document.title".to_string(),
1016                reason: e.to_string(),
1017            })
1018            .map(Option::unwrap_or_default)
1019    }
1020
1021    /// Return the page's full outer HTML.
1022    ///
1023    /// # Errors
1024    ///
1025    pub async fn content(&self) -> Result<String> {
1026        timeout(self.cdp_timeout, self.page.content())
1027            .await
1028            .map_err(|_| BrowserError::Timeout {
1029                operation: "page.content".to_string(),
1030                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1031            })?
1032            .map_err(|e| BrowserError::ScriptExecutionFailed {
1033                script: "document.documentElement.outerHTML".to_string(),
1034                reason: e.to_string(),
1035            })
1036    }
1037
1038    /// lightweight [`NodeHandle`]s backed by CDP `RemoteObjectId`s.
1039    ///
1040    /// No HTML serialisation occurs — the browser's in-memory DOM is queried
1041    /// directly over the CDP connection, eliminating the `page.content()` +
1042    /// `scraper::Html::parse_document` round-trip.
1043    ///
1044    ///
1045    /// # Errors
1046    ///
1047    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
1048    ///
1049    /// # Example
1050    ///
1051    /// ```no_run
1052    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1053    /// use std::time::Duration;
1054    ///
1055    /// # async fn run() -> stygian_browser::error::Result<()> {
1056    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1057    /// let handle = pool.acquire().await?;
1058    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1059    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
1060    /// # let nodes = page.query_selector_all("div[data-ux]").await?;
1061    /// # for node in &nodes {
1062    ///     let ux_type = node.attr("data-ux").await?;
1063    ///     let text    = node.text_content().await?;
1064    ///     println!("{ux_type:?}: {text}");
1065    /// # }
1066    /// # Ok(())
1067    /// # }
1068    /// ```
1069    pub async fn query_selector_all(&self, selector: &str) -> Result<Vec<NodeHandle>> {
1070        let elements = timeout(self.cdp_timeout, self.page.find_elements(selector))
1071            .await
1072            .map_err(|_| BrowserError::Timeout {
1073                operation: "PageHandle::query_selector_all".to_string(),
1074                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1075            })?
1076            .map_err(|e| BrowserError::CdpError {
1077                operation: "PageHandle::query_selector_all".to_string(),
1078                message: e.to_string(),
1079            })?;
1080
1081        let selector_arc: Arc<str> = Arc::from(selector);
1082        Ok(elements
1083            .into_iter()
1084            .map(|el| NodeHandle {
1085                element: el,
1086                selector: selector_arc.clone(),
1087                cdp_timeout: self.cdp_timeout,
1088                page: self.page.clone(),
1089            })
1090            .collect())
1091    }
1092
1093    /// Evaluate arbitrary JavaScript and return the result as `T`.
1094    ///
1095    /// # Errors
1096    ///
1097    /// deserialization error.
1098    pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
1099        let script_owned = script.to_string();
1100        timeout(self.cdp_timeout, self.page.evaluate(script))
1101            .await
1102            .map_err(|_| BrowserError::Timeout {
1103                operation: "page.evaluate".to_string(),
1104                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1105            })?
1106            .map_err(|e| BrowserError::ScriptExecutionFailed {
1107                script: script_owned.clone(),
1108                reason: e.to_string(),
1109            })?
1110            .into_value::<T>()
1111            .map_err(|e| BrowserError::ScriptExecutionFailed {
1112                script: script_owned,
1113                reason: e.to_string(),
1114            })
1115    }
1116
1117    ///
1118    /// # Errors
1119    ///
1120    pub async fn save_cookies(
1121        &self,
1122    ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
1123        use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
1124
1125        let url = self
1126            .page
1127            .url()
1128            .await
1129            .map_err(|e| BrowserError::CdpError {
1130                operation: "page.url".to_string(),
1131                message: e.to_string(),
1132            })?
1133            .unwrap_or_default();
1134
1135        timeout(
1136            self.cdp_timeout,
1137            self.page
1138                .execute(GetCookiesParams::builder().urls(vec![url]).build()),
1139        )
1140        .await
1141        .map_err(|_| BrowserError::Timeout {
1142            operation: "Network.getCookies".to_string(),
1143            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1144        })?
1145        .map_err(|e| BrowserError::CdpError {
1146            operation: "Network.getCookies".to_string(),
1147            message: e.to_string(),
1148        })
1149        .map(|r| r.cookies.clone())
1150    }
1151
1152    ///
1153    /// [`SessionSnapshot`][crate::session::SessionSnapshot] and without
1154    /// requiring a direct `chromiumoxide` dependency in calling code.
1155    ///
1156    /// Individual cookie failures are logged as warnings and do not abort the
1157    /// remaining cookies.
1158    ///
1159    /// # Errors
1160    ///
1161    /// call exceeds `cdp_timeout`.
1162    ///
1163    /// # Example
1164    ///
1165    /// ```no_run
1166    /// use stygian_browser::{BrowserPool, BrowserConfig};
1167    /// use stygian_browser::session::SessionCookie;
1168    /// use std::time::Duration;
1169    ///
1170    /// # async fn run() -> stygian_browser::error::Result<()> {
1171    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1172    /// let handle = pool.acquire().await?;
1173    /// let page = handle.browser().expect("valid browser").new_page().await?;
1174    /// let cookies = vec![SessionCookie {
1175    ///     name: "session".to_string(),
1176    ///     value: "abc123".to_string(),
1177    ///     domain: ".example.com".to_string(),
1178    ///     path: "/".to_string(),
1179    ///     expires: -1.0,
1180    ///     http_only: true,
1181    ///     secure: true,
1182    ///     same_site: "Lax".to_string(),
1183    /// }];
1184    /// page.inject_cookies(&cookies).await?;
1185    /// # Ok(())
1186    /// # }
1187    /// ```
1188    pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
1189        use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
1190
1191        for cookie in cookies {
1192            let params = match SetCookieParams::builder()
1193                .name(cookie.name.clone())
1194                .value(cookie.value.clone())
1195                .domain(cookie.domain.clone())
1196                .path(cookie.path.clone())
1197                .http_only(cookie.http_only)
1198                .secure(cookie.secure)
1199                .build()
1200            {
1201                Ok(p) => p,
1202                Err(e) => {
1203                    warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
1204                    continue;
1205                }
1206            };
1207
1208            match timeout(self.cdp_timeout, self.page.execute(params)).await {
1209                Err(_) => {
1210                    warn!(
1211                        cookie = %cookie.name,
1212                        timeout_ms = self.cdp_timeout.as_millis(),
1213                        "Timed out injecting cookie"
1214                    );
1215                }
1216                Ok(Err(e)) => {
1217                    warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
1218                }
1219                Ok(Ok(_)) => {}
1220            }
1221        }
1222
1223        debug!(count = cookies.len(), "Cookies injected");
1224        Ok(())
1225    }
1226
1227    /// Capture a screenshot of the current page as PNG bytes.
1228    ///
1229    /// them in-memory.
1230    ///
1231    /// # Errors
1232    ///
1233    /// command fails, or [`BrowserError::Timeout`] if it exceeds
1234    /// `cdp_timeout`.
1235    ///
1236    /// # Example
1237    ///
1238    /// ```no_run
1239    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1240    /// use std::{time::Duration, fs};
1241    ///
1242    /// # async fn run() -> stygian_browser::error::Result<()> {
1243    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1244    /// let handle = pool.acquire().await?;
1245    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1246    /// let png = page.screenshot().await?;
1247    /// fs::write("screenshot.png", &png).unwrap();
1248    /// # Ok(())
1249    /// # }
1250    /// ```
1251    pub async fn screenshot(&self) -> Result<Vec<u8>> {
1252        use chromiumoxide::page::ScreenshotParams;
1253
1254        let params = ScreenshotParams::builder().full_page(true).build();
1255
1256        timeout(self.cdp_timeout, self.page.screenshot(params))
1257            .await
1258            .map_err(|_| BrowserError::Timeout {
1259                operation: "Page.captureScreenshot".to_string(),
1260                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1261            })?
1262            .map_err(|e| BrowserError::CdpError {
1263                operation: "Page.captureScreenshot".to_string(),
1264                message: e.to_string(),
1265            })
1266    }
1267
1268    /// Borrow the underlying chromiumoxide [`Page`].
1269    pub const fn inner(&self) -> &Page {
1270        &self.page
1271    }
1272
1273    /// Close this page (tab).
1274    ///
1275    pub async fn close(self) -> Result<()> {
1276        timeout(Duration::from_secs(5), self.page.clone().close())
1277            .await
1278            .map_err(|_| BrowserError::Timeout {
1279                operation: "page.close".to_string(),
1280                duration_ms: 5000,
1281            })?
1282            .map_err(|e| BrowserError::CdpError {
1283                operation: "page.close".to_string(),
1284                message: e.to_string(),
1285            })
1286    }
1287}
1288
1289// ─── Stealth diagnostics ──────────────────────────────────────────────────────
1290
1291#[cfg(feature = "stealth")]
1292impl PageHandle {
1293    /// Run all built-in stealth detection checks against the current page.
1294    ///
1295    /// Iterates [`crate::diagnostic::all_checks`], evaluates each check's
1296    /// JavaScript via CDP `Runtime.evaluate`, and returns an aggregate
1297    /// [`crate::diagnostic::DiagnosticReport`].
1298    ///
1299    /// recorded as failing checks and do **not** abort the whole run.
1300    ///
1301    /// # Errors
1302    ///
1303    /// Individual check failures are captured in the report.
1304    ///
1305    /// # Example
1306    ///
1307    /// ```no_run
1308    /// # async fn run() -> stygian_browser::error::Result<()> {
1309    /// use stygian_browser::{BrowserPool, BrowserConfig};
1310    /// use stygian_browser::page::WaitUntil;
1311    /// use std::time::Duration;
1312    ///
1313    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1314    /// let handle = pool.acquire().await?;
1315    /// let browser = handle.browser().expect("valid browser");
1316    /// let mut page = browser.new_page().await?;
1317    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(10)).await?;
1318    ///
1319    /// let report = page.verify_stealth().await?;
1320    /// println!("Stealth: {}/{} checks passed", report.passed_count, report.checks.len());
1321    /// # for failure in report.failures() {
1322    ///     eprintln!("  FAIL  {}: {}", failure.description, failure.details);
1323    /// # }
1324    /// # Ok(())
1325    /// # }
1326    /// ```
1327    pub async fn verify_stealth(&self) -> Result<crate::diagnostic::DiagnosticReport> {
1328        use crate::diagnostic::{CheckResult, DiagnosticReport, all_checks};
1329
1330        let mut results: Vec<CheckResult> = Vec::new();
1331
1332        for check in all_checks() {
1333            let result = match self.eval::<String>(check.script).await {
1334                Ok(json) => check.parse_output(&json),
1335                Err(e) => {
1336                    tracing::warn!(
1337                        check = ?check.id,
1338                        error = %e,
1339                        "stealth check script failed during evaluation"
1340                    );
1341                    CheckResult {
1342                        id: check.id,
1343                        description: check.description.to_string(),
1344                        passed: false,
1345                        details: format!("script error: {e}"),
1346                    }
1347                }
1348            };
1349            tracing::debug!(
1350                check = ?result.id,
1351                passed = result.passed,
1352                details = %result.details,
1353                "stealth check result"
1354            );
1355            results.push(result);
1356        }
1357
1358        Ok(DiagnosticReport::new(results))
1359    }
1360
1361    /// Run stealth checks and attach transport diagnostics (JA3/JA4/HTTP3).
1362    ///
1363    pub async fn verify_stealth_with_transport(
1364        &self,
1365        observed: Option<crate::diagnostic::TransportObservations>,
1366    ) -> Result<crate::diagnostic::DiagnosticReport> {
1367        let report = self.verify_stealth().await?;
1368
1369        let user_agent = match self.eval::<String>("navigator.userAgent").await {
1370            Ok(ua) => ua,
1371            Err(e) => {
1372                tracing::warn!(error = %e, "failed to read navigator.userAgent for transport diagnostics");
1373                String::new()
1374            }
1375        };
1376
1377        let transport = crate::diagnostic::TransportDiagnostic::from_user_agent_and_observations(
1378            &user_agent,
1379            observed.as_ref(),
1380        );
1381
1382        Ok(report.with_transport(transport))
1383    }
1384}
1385
1386// ─── extract feature ─────────────────────────────────────────────────────────
1387
1388#[cfg(feature = "extract")]
1389impl PageHandle {
1390    ///
1391    ///
1392    /// All per-node extractions are driven concurrently via
1393    /// [`futures::future::try_join_all`].
1394    ///
1395    /// # Errors
1396    ///
1397    /// fails, or [`BrowserError::ExtractionFailed`] if any field extraction
1398    /// fails.
1399    ///
1400    /// # Example
1401    ///
1402    /// ```ignore
1403    /// use stygian_browser::extract::Extract;
1404    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1405    /// use std::time::Duration;
1406    ///
1407    /// #[derive(Extract)]
1408    /// struct Link {
1409    ///     href: Option<String>,
1410    /// }
1411    ///
1412    /// # async fn run() -> stygian_browser::error::Result<()> {
1413    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1414    /// let handle = pool.acquire().await?;
1415    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1416    /// page.navigate(
1417    ///     "https://example.com",
1418    ///     WaitUntil::DomContentLoaded,
1419    ///     Duration::from_secs(30),
1420    /// ).await?;
1421    /// let links: Vec<Link> = page.extract_all::<Link>("nav li").await?;
1422    /// # Ok(())
1423    /// # }
1424    /// ```
1425    pub async fn extract_all<T>(&self, selector: &str) -> Result<Vec<T>>
1426    where
1427        T: crate::extract::Extractable,
1428    {
1429        use futures::future::try_join_all;
1430
1431        let nodes = self.query_selector_all(selector).await?;
1432        try_join_all(nodes.iter().map(|n| T::extract_from(n)))
1433            .await
1434            .map_err(BrowserError::ExtractionFailed)
1435    }
1436}
1437
1438// ─── similarity feature ──────────────────────────────────────────────────────
1439
1440#[cfg(feature = "similarity")]
1441impl NodeHandle {
1442    /// node.
1443    ///
1444    /// Issues a single `Runtime.callFunctionOn` JS eval that extracts the tag,
1445    /// class list, attribute names, and body-depth in one round-trip.
1446    ///
1447    /// # Errors
1448    ///
1449    /// invalidated, or [`BrowserError::ScriptExecutionFailed`] if the script
1450    /// produces unexpected output.
1451    pub async fn fingerprint(&self) -> Result<crate::similarity::ElementFingerprint> {
1452        const JS: &str = r"function() {
1453    var el = this;
1454    var tag = el.tagName.toLowerCase();
1455    var classes = Array.prototype.slice.call(el.classList).sort();
1456    var attrNames = Array.prototype.slice.call(el.attributes)
1457        .map(function(a) { return a.name; })
1458        .filter(function(n) { return n !== 'class' && n !== 'id'; })
1459        .sort();
1460    var depth = 0;
1461    var n = el.parentElement;
1462    while (n && n.tagName.toLowerCase() !== 'body') { depth++; n = n.parentElement; }
1463    return JSON.stringify({ tag: tag, classes: classes, attrNames: attrNames, depth: depth });
1464}";
1465
1466        let returns = tokio::time::timeout(self.cdp_timeout, self.element.call_js_fn(JS, true))
1467            .await
1468            .map_err(|_| BrowserError::Timeout {
1469                operation: "NodeHandle::fingerprint".to_string(),
1470                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1471            })?
1472            .map_err(|e| self.cdp_err_or_stale(&e, "fingerprint"))?;
1473
1474        let json_str = returns
1475            .result
1476            .value
1477            .as_ref()
1478            .and_then(|v| v.as_str())
1479            .ok_or_else(|| BrowserError::ScriptExecutionFailed {
1480                script: "NodeHandle::fingerprint".to_string(),
1481                reason: "CDP returned no string value from fingerprint script".to_string(),
1482            })?;
1483
1484        serde_json::from_str::<crate::similarity::ElementFingerprint>(json_str).map_err(|e| {
1485            BrowserError::ScriptExecutionFailed {
1486                script: "NodeHandle::fingerprint".to_string(),
1487                reason: format!("failed to deserialise fingerprint JSON: {e}"),
1488            }
1489        })
1490    }
1491}
1492
1493#[cfg(feature = "similarity")]
1494impl PageHandle {
1495    /// `reference`, scored by [`crate::similarity::SimilarityConfig`].
1496    ///
1497    /// [`NodeHandle::fingerprint`]), then fingerprints every candidate returned
1498    /// [`crate::similarity::jaccard_weighted`] score exceeds
1499    /// `config.threshold`.  Results are ordered by score descending.
1500    ///
1501    /// # Example
1502    ///
1503    /// ```no_run
1504    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1505    /// use stygian_browser::similarity::SimilarityConfig;
1506    /// use std::time::Duration;
1507    ///
1508    /// # async fn run() -> stygian_browser::error::Result<()> {
1509    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1510    /// let handle = pool.acquire().await?;
1511    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1512    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
1513    ///
1514    /// # let nodes = page.query_selector_all("h1").await?;
1515    /// # let reference = nodes.into_iter().next().ok_or(stygian_browser::error::BrowserError::StaleNode { selector: "h1".to_string() })?;
1516    ///     let similar = page.find_similar(&reference, SimilarityConfig::default()).await?;
1517    /// # for m in &similar {
1518    ///         println!("score={:.2}", m.score);
1519    /// # }
1520    /// # Ok(())
1521    /// # }
1522    /// ```
1523    ///
1524    /// # Errors
1525    ///
1526    /// [`BrowserError::ScriptExecutionFailed`] if a scoring script fails.
1527    pub async fn find_similar(
1528        &self,
1529        reference: &NodeHandle,
1530        config: crate::similarity::SimilarityConfig,
1531    ) -> Result<Vec<crate::similarity::SimilarMatch>> {
1532        use crate::similarity::{SimilarMatch, jaccard_weighted};
1533
1534        let ref_fp = reference.fingerprint().await?;
1535        let candidates = self.query_selector_all("*").await?;
1536
1537        let mut matches: Vec<SimilarMatch> = Vec::new();
1538        for node in candidates {
1539            if let Ok(cand_fp) = node.fingerprint().await {
1540                let score = jaccard_weighted(&ref_fp, &cand_fp);
1541                if score >= config.threshold {
1542                    matches.push(SimilarMatch { node, score });
1543                }
1544            }
1545            // Stale / detached nodes are silently skipped.
1546        }
1547
1548        matches.sort_by(|a, b| {
1549            b.score
1550                .partial_cmp(&a.score)
1551                .unwrap_or(std::cmp::Ordering::Equal)
1552        });
1553
1554        if config.max_results > 0 {
1555            matches.truncate(config.max_results);
1556        }
1557
1558        Ok(matches)
1559    }
1560}
1561
1562impl Drop for PageHandle {
1563    fn drop(&mut self) {
1564        warn!("PageHandle dropped without explicit close(); spawning cleanup task");
1565        // chromiumoxide Page does not implement close on Drop, so we spawn
1566        // swap it out. We clone the Page handle (it's Arc-backed internally).
1567        let page = self.page.clone();
1568        tokio::spawn(async move {
1569            let _ = page.close().await;
1570        });
1571    }
1572}
1573
1574// ─── Tests ────────────────────────────────────────────────────────────────────
1575
1576#[cfg(test)]
1577mod tests {
1578    use super::*;
1579
1580    #[test]
1581    fn resource_filter_block_media_blocks_image() {
1582        let filter = ResourceFilter::block_media();
1583        assert!(filter.should_block("Image"));
1584        assert!(filter.should_block("Font"));
1585        assert!(filter.should_block("Stylesheet"));
1586        assert!(filter.should_block("Media"));
1587        assert!(!filter.should_block("Script"));
1588        assert!(!filter.should_block("XHR"));
1589    }
1590
1591    #[test]
1592    fn resource_filter_case_insensitive() {
1593        let filter = ResourceFilter::block_images_and_fonts();
1594        assert!(filter.should_block("image")); // lowercase
1595        assert!(filter.should_block("IMAGE")); // uppercase
1596        assert!(!filter.should_block("Stylesheet"));
1597    }
1598
1599    #[test]
1600    fn resource_filter_builder_chain() {
1601        let filter = ResourceFilter::default()
1602            .block(ResourceType::Image)
1603            .block(ResourceType::Font);
1604        assert!(filter.should_block("Image"));
1605        assert!(filter.should_block("Font"));
1606        assert!(!filter.should_block("Stylesheet"));
1607    }
1608
1609    #[test]
1610    fn resource_filter_dedup_block() {
1611        let filter = ResourceFilter::default()
1612            .block(ResourceType::Image)
1613            .block(ResourceType::Image); // duplicate
1614        assert_eq!(filter.blocked.len(), 1);
1615    }
1616
1617    #[test]
1618    fn resource_filter_is_empty_when_default() {
1619        assert!(ResourceFilter::default().is_empty());
1620        assert!(!ResourceFilter::block_media().is_empty());
1621    }
1622
1623    #[test]
1624    fn wait_until_selector_stores_string() {
1625        let w = WaitUntil::Selector("#foo".to_string());
1626        assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
1627    }
1628
1629    #[test]
1630    fn resource_type_cdp_str() {
1631        assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
1632        assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
1633        assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
1634        assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
1635    }
1636
1637    #[test]
1638    fn page_handle_is_send_sync() {
1639        fn assert_send<T: Send>() {}
1640        fn assert_sync<T: Sync>() {}
1641        assert_send::<PageHandle>();
1642        assert_sync::<PageHandle>();
1643    }
1644
1645    /// `Option<u16>` are pure-logic invariants testable without a live browser.
1646    #[test]
1647    fn status_code_sentinel_zero_maps_to_none() {
1648        use std::sync::atomic::{AtomicU16, Ordering};
1649        let atom = AtomicU16::new(0);
1650        let code = atom.load(Ordering::Acquire);
1651        assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
1652    }
1653
1654    #[test]
1655    fn status_code_non_zero_maps_to_some() {
1656        use std::sync::atomic::{AtomicU16, Ordering};
1657        for &expected in &[200u16, 301, 404, 503] {
1658            let atom = AtomicU16::new(expected);
1659            let code = atom.load(Ordering::Acquire);
1660            assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
1661        }
1662    }
1663
1664    // ── NodeHandle pure-logic tests ───────────────────────────────────────────
1665
1666    /// `attr_map` relies on `chunks_exact(2)` — verify the pairing logic is
1667    /// correct without a live browser by exercising it directly.
1668    #[test]
1669    fn attr_map_chunking_pairs_correctly() {
1670        let flat = [
1671            "id".to_string(),
1672            "main".to_string(),
1673            "data-ux".to_string(),
1674            "Section".to_string(),
1675            "class".to_string(),
1676            "container".to_string(),
1677        ];
1678        let mut map = std::collections::HashMap::with_capacity(flat.len() / 2);
1679        for pair in flat.chunks_exact(2) {
1680            if let [name, value] = pair {
1681                map.insert(name.clone(), value.clone());
1682            }
1683        }
1684        assert_eq!(map.get("id").map(String::as_str), Some("main"));
1685        assert_eq!(map.get("data-ux").map(String::as_str), Some("Section"));
1686        assert_eq!(map.get("class").map(String::as_str), Some("container"));
1687        assert_eq!(map.len(), 3);
1688    }
1689
1690    /// gracefully — the trailing element is silently ignored.
1691    #[test]
1692    fn attr_map_chunking_ignores_odd_trailing() {
1693        let flat = ["orphan".to_string()]; // no value
1694        let mut map = std::collections::HashMap::new();
1695        for pair in flat.chunks_exact(2) {
1696            if let [name, value] = pair {
1697                map.insert(name.clone(), value.clone());
1698            }
1699        }
1700        assert!(map.is_empty());
1701    }
1702
1703    /// Empty flat list → empty map.
1704    #[test]
1705    fn attr_map_chunking_empty_input() {
1706        let flat: Vec<String> = vec![];
1707        let map: std::collections::HashMap<String, String> = flat
1708            .chunks_exact(2)
1709            .filter_map(|pair| {
1710                if let [name, value] = pair {
1711                    Some((name.clone(), value.clone()))
1712                } else {
1713                    None
1714                }
1715            })
1716            .collect();
1717        assert!(map.is_empty());
1718    }
1719
1720    #[test]
1721    fn ancestors_json_parse_round_trip() -> std::result::Result<(), serde_json::Error> {
1722        let json = r#"["p","article","body","html"]"#;
1723        let result: Vec<String> = serde_json::from_str(json)?;
1724        assert_eq!(result, ["p", "article", "body", "html"]);
1725        Ok(())
1726    }
1727
1728    #[test]
1729    fn ancestors_json_parse_empty() -> std::result::Result<(), serde_json::Error> {
1730        let json = "[]";
1731        let result: Vec<String> = serde_json::from_str(json)?;
1732        assert!(result.is_empty());
1733        Ok(())
1734    }
1735
1736    /// `"div::parent"`) must surface that suffix in its `Display` output so
1737    /// callers can locate the failed traversal in logs.
1738    #[test]
1739    fn traversal_selector_suffix_in_stale_error() {
1740        let e = crate::error::BrowserError::StaleNode {
1741            selector: "div::parent".to_string(),
1742        };
1743        let msg = e.to_string();
1744        assert!(
1745            msg.contains("div::parent"),
1746            "StaleNode display must include the full selector; got: {msg}"
1747        );
1748    }
1749
1750    #[test]
1751    fn traversal_next_suffix_in_stale_error() {
1752        let e = crate::error::BrowserError::StaleNode {
1753            selector: "li.price::next".to_string(),
1754        };
1755        assert!(e.to_string().contains("li.price::next"));
1756    }
1757
1758    #[test]
1759    fn traversal_prev_suffix_in_stale_error() {
1760        let e = crate::error::BrowserError::StaleNode {
1761            selector: "td.label::prev".to_string(),
1762        };
1763        assert!(e.to_string().contains("td.label::prev"));
1764    }
1765}