Skip to main content

stygian_browser/
page.rs

1//! Page and browsing context management for isolated, parallel scraping
2//!
3//! Each `BrowserContext` (future) is an incognito-style isolation boundary (separate
4//! cookies, localStorage, cache).  Each context can contain many [`PageHandle`]s
5//! (tabs).  Both types clean up their CDP resources automatically on drop.
6//!
7//! ## Resource blocking
8//!
9//! Pass a [`ResourceFilter`] to [`PageHandle::set_resource_filter`] to intercept
10//! and block specific request types (images, fonts, CSS) before page load —
11//! significantly reducing page load times for text-only scraping.
12//!
13//! ## Wait strategies
14//!
15//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
16//! - `DomContentLoaded` — fires when the HTML is parsed
17//! - `NetworkIdle` — fires when there are ≤2 in-flight requests for 500 ms
18//! - `Selector(css)` — fires when a CSS selector matches an element
19//!
20//! # Example
21//!
22//! ```no_run
23//! use stygian_browser::{BrowserPool, BrowserConfig};
24//! use stygian_browser::page::{ResourceFilter, WaitUntil};
25//! use std::time::Duration;
26//!
27//! # async fn run() -> stygian_browser::error::Result<()> {
28//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
29//! let handle = pool.acquire().await?;
30//!
31//! let mut page = handle.browser().expect("valid browser").new_page().await?;
32//! page.set_resource_filter(ResourceFilter::block_media()).await?;
33//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
34//! let title = page.title().await?;
35//! println!("title: {title}");
36//! handle.release().await;
37//! # Ok(())
38//! # }
39//! ```
40
41use std::sync::{
42    Arc,
43    atomic::{AtomicU16, Ordering},
44};
45use std::time::Duration;
46
47use chromiumoxide::Page;
48use tokio::time::timeout;
49use tracing::{debug, warn};
50
51use crate::error::{BrowserError, Result};
52
53// ─── ResourceType ─────────────────────────────────────────────────────────────
54
55/// CDP resource types that can be intercepted.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub enum ResourceType {
58    /// `<img>`, `<picture>`, background images
59    Image,
60    /// Web fonts loaded via CSS `@font-face`
61    Font,
62    /// External CSS stylesheets
63    Stylesheet,
64    /// Media files (audio/video)
65    Media,
66}
67
68impl ResourceType {
69    /// Returns the string used in CDP `Network.requestIntercepted` events.
70    pub const fn as_cdp_str(&self) -> &'static str {
71        match self {
72            Self::Image => "Image",
73            Self::Font => "Font",
74            Self::Stylesheet => "Stylesheet",
75            Self::Media => "Media",
76        }
77    }
78}
79
80// ─── ResourceFilter ───────────────────────────────────────────────────────────
81
82/// Set of resource types to block from loading.
83///
84/// # Example
85///
86/// ```
87/// use stygian_browser::page::ResourceFilter;
88/// let filter = ResourceFilter::block_media();
89/// assert!(filter.should_block("Image"));
90/// ```
91#[derive(Debug, Clone, Default)]
92pub struct ResourceFilter {
93    blocked: Vec<ResourceType>,
94}
95
96impl ResourceFilter {
97    /// Block all media resources (images, fonts, CSS, audio/video).
98    pub fn block_media() -> Self {
99        Self {
100            blocked: vec![
101                ResourceType::Image,
102                ResourceType::Font,
103                ResourceType::Stylesheet,
104                ResourceType::Media,
105            ],
106        }
107    }
108
109    /// Block only images and fonts (keep styles for layout-sensitive work).
110    pub fn block_images_and_fonts() -> Self {
111        Self {
112            blocked: vec![ResourceType::Image, ResourceType::Font],
113        }
114    }
115
116    /// Add a resource type to the block list.
117    #[must_use]
118    pub fn block(mut self, resource: ResourceType) -> Self {
119        if !self.blocked.contains(&resource) {
120            self.blocked.push(resource);
121        }
122        self
123    }
124
125    /// Returns `true` if the given CDP resource type string should be blocked.
126    pub fn should_block(&self, cdp_type: &str) -> bool {
127        self.blocked
128            .iter()
129            .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
130    }
131
132    /// Returns `true` if no resource types are blocked.
133    pub const fn is_empty(&self) -> bool {
134        self.blocked.is_empty()
135    }
136}
137
138// ─── WaitUntil ────────────────────────────────────────────────────────────────
139
140/// Condition to wait for after a navigation.
141///
142/// # Example
143///
144/// ```
145/// use stygian_browser::page::WaitUntil;
146/// let w = WaitUntil::Selector("#main".to_string());
147/// assert!(matches!(w, WaitUntil::Selector(_)));
148/// ```
149#[derive(Debug, Clone)]
150pub enum WaitUntil {
151    /// Wait for the `Page.domContentEventFired` CDP event — fires when the HTML
152    /// document has been fully parsed and the DOM is ready, before subresources
153    /// such as images and stylesheets finish loading.
154    DomContentLoaded,
155    /// Wait for the `Page.loadEventFired` CDP event **and** then wait until no
156    /// more than 2 network requests are in-flight for at least 500 ms
157    /// (equivalent to Playwright's `networkidle2`).
158    NetworkIdle,
159    /// Wait until `document.querySelector(selector)` returns a non-null element.
160    Selector(String),
161}
162
163// ─── PageHandle ───────────────────────────────────────────────────────────────
164
165/// A handle to an open browser tab.
166///
167/// On drop the underlying page is closed automatically.
168///
169/// # Example
170///
171/// ```no_run
172/// use stygian_browser::{BrowserPool, BrowserConfig};
173/// use stygian_browser::page::WaitUntil;
174/// use std::time::Duration;
175///
176/// # async fn run() -> stygian_browser::error::Result<()> {
177/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
178/// let handle = pool.acquire().await?;
179/// let mut page = handle.browser().expect("valid browser").new_page().await?;
180/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
181/// let html = page.content().await?;
182/// drop(page); // closes the tab
183/// handle.release().await;
184/// # Ok(())
185/// # }
186/// ```
187pub struct PageHandle {
188    page: Page,
189    cdp_timeout: Duration,
190    /// HTTP status code of the most recent main-frame navigation, or `0` if not
191    /// yet captured.  Written atomically by the listener spawned in `navigate()`.
192    last_status_code: Arc<AtomicU16>,
193}
194
195impl PageHandle {
196    /// Wrap a raw chromiumoxide [`Page`] in a handle.
197    pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
198        Self {
199            page,
200            cdp_timeout,
201            last_status_code: Arc::new(AtomicU16::new(0)),
202        }
203    }
204
205    /// Navigate to `url` and wait for `condition` within `nav_timeout`.
206    ///
207    /// # Errors
208    ///
209    /// Returns [`BrowserError::NavigationFailed`] if the navigation times out or
210    /// the CDP call fails.
211    pub async fn navigate(
212        &mut self,
213        url: &str,
214        condition: WaitUntil,
215        nav_timeout: Duration,
216    ) -> Result<()> {
217        self.setup_status_capture().await;
218        timeout(
219            nav_timeout,
220            self.navigate_inner(url, condition, nav_timeout),
221        )
222        .await
223        .map_err(|_| BrowserError::NavigationFailed {
224            url: url.to_string(),
225            reason: format!("navigation timed out after {nav_timeout:?}"),
226        })?
227    }
228
229    /// Reset the last status code and wire up the `Network.responseReceived`
230    /// listener before any navigation starts.  Errors are logged and swallowed
231    /// so that a missing network domain never blocks navigation.
232    async fn setup_status_capture(&self) {
233        use chromiumoxide::cdp::browser_protocol::network::{
234            EventResponseReceived, ResourceType as NetworkResourceType,
235        };
236        use futures::StreamExt;
237
238        // Reset so a stale code is not returned if the new navigation fails
239        // before the response headers arrive.
240        self.last_status_code.store(0, Ordering::Release);
241
242        // Subscribe *before* goto() — the listener runs in a detached task and
243        // stores the first Document-type response status atomically.
244        let page_for_listener = self.page.clone();
245        let status_capture = Arc::clone(&self.last_status_code);
246        match page_for_listener
247            .event_listener::<EventResponseReceived>()
248            .await
249        {
250            Ok(mut stream) => {
251                tokio::spawn(async move {
252                    while let Some(event) = stream.next().await {
253                        if event.r#type == NetworkResourceType::Document {
254                            let code = u16::try_from(event.response.status).unwrap_or(0);
255                            if code > 0 {
256                                status_capture.store(code, Ordering::Release);
257                            }
258                            break;
259                        }
260                    }
261                });
262            }
263            Err(e) => warn!("status-code capture unavailable: {e}"),
264        }
265    }
266
267    /// Subscribe to the appropriate CDP events, fire `goto`, then await
268    /// `condition`.  All subscriptions precede `goto` to eliminate the race
269    /// described in issue #7.
270    async fn navigate_inner(
271        &self,
272        url: &str,
273        condition: WaitUntil,
274        nav_timeout: Duration,
275    ) -> Result<()> {
276        use chromiumoxide::cdp::browser_protocol::page::{
277            EventDomContentEventFired, EventLoadEventFired,
278        };
279        use futures::StreamExt;
280
281        let url_owned = url.to_string();
282
283        let mut dom_events = match &condition {
284            WaitUntil::DomContentLoaded => Some(
285                self.page
286                    .event_listener::<EventDomContentEventFired>()
287                    .await
288                    .map_err(|e| BrowserError::NavigationFailed {
289                        url: url_owned.clone(),
290                        reason: e.to_string(),
291                    })?,
292            ),
293            _ => None,
294        };
295
296        let mut load_events = match &condition {
297            WaitUntil::NetworkIdle => Some(
298                self.page
299                    .event_listener::<EventLoadEventFired>()
300                    .await
301                    .map_err(|e| BrowserError::NavigationFailed {
302                        url: url_owned.clone(),
303                        reason: e.to_string(),
304                    })?,
305            ),
306            _ => None,
307        };
308
309        let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
310            Some(self.subscribe_inflight_counter().await)
311        } else {
312            None
313        };
314
315        self.page
316            .goto(url)
317            .await
318            .map_err(|e| BrowserError::NavigationFailed {
319                url: url_owned.clone(),
320                reason: e.to_string(),
321            })?;
322
323        match &condition {
324            WaitUntil::DomContentLoaded => {
325                if let Some(ref mut events) = dom_events {
326                    let _ = events.next().await;
327                }
328            }
329            WaitUntil::NetworkIdle => {
330                if let Some(ref mut events) = load_events {
331                    let _ = events.next().await;
332                }
333                if let Some(ref counter) = inflight {
334                    Self::wait_network_idle(counter).await;
335                }
336            }
337            WaitUntil::Selector(css) => {
338                self.wait_for_selector(css, nav_timeout).await?;
339            }
340        }
341        Ok(())
342    }
343
344    /// Spawn three detached tasks that maintain a signed in-flight request
345    /// counter via `Network.requestWillBeSent` (+1) and
346    /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
347    /// Returns the shared counter so the caller can poll it.
348    async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
349        use std::sync::atomic::AtomicI32;
350
351        use chromiumoxide::cdp::browser_protocol::network::{
352            EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
353        };
354        use futures::StreamExt;
355
356        let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
357        let pairs: [(Arc<AtomicI32>, i32); 3] = [
358            (Arc::clone(&counter), 1),
359            (Arc::clone(&counter), -1),
360            (Arc::clone(&counter), -1),
361        ];
362        let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
363
364        macro_rules! spawn_tracker {
365            ($page:expr, $event:ty, $c:expr, $delta:expr) => {
366                match $page.event_listener::<$event>().await {
367                    Ok(mut s) => {
368                        let c = $c;
369                        let d = $delta;
370                        tokio::spawn(async move {
371                            while s.next().await.is_some() {
372                                c.fetch_add(d, Ordering::Relaxed);
373                            }
374                        });
375                    }
376                    Err(e) => warn!("network-idle tracker unavailable: {e}"),
377                }
378            };
379        }
380
381        let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
382        spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
383        spawn_tracker!(p2, EventLoadingFinished, c2, d2);
384        spawn_tracker!(p3, EventLoadingFailed, c3, d3);
385
386        counter
387    }
388
389    /// Poll `counter` until ≤ 2 in-flight requests persist for 500 ms
390    /// (equivalent to Playwright's `networkidle2`).
391    async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
392        const IDLE_THRESHOLD: i32 = 2;
393        const SETTLE: Duration = Duration::from_millis(500);
394        loop {
395            if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
396                tokio::time::sleep(SETTLE).await;
397                if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
398                    break;
399                }
400            } else {
401                tokio::time::sleep(Duration::from_millis(50)).await;
402            }
403        }
404    }
405
406    /// Wait until `document.querySelector(selector)` is non-null (`timeout`).
407    ///
408    /// # Errors
409    ///
410    /// Returns [`BrowserError::NavigationFailed`] if the selector is not found
411    /// within the given timeout.
412    pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
413        let selector_owned = selector.to_string();
414        let poll = async {
415            loop {
416                if self.page.find_element(selector_owned.clone()).await.is_ok() {
417                    return Ok(());
418                }
419                tokio::time::sleep(Duration::from_millis(100)).await;
420            }
421        };
422
423        timeout(wait_timeout, poll)
424            .await
425            .map_err(|_| BrowserError::NavigationFailed {
426                url: String::new(),
427                reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
428            })?
429    }
430
431    /// Set a resource filter to block specific network request types.
432    ///
433    /// **Note:** Requires Network.enable; called automatically.
434    ///
435    /// # Errors
436    ///
437    /// Returns a [`BrowserError::CdpError`] if the CDP call fails.
438    pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
439        use chromiumoxide::cdp::browser_protocol::fetch::{EnableParams, RequestPattern};
440
441        if filter.is_empty() {
442            return Ok(());
443        }
444
445        // Both builders are infallible — they return the struct directly (not Result)
446        let pattern = RequestPattern::builder().url_pattern("*").build();
447        let params = EnableParams::builder()
448            .patterns(vec![pattern])
449            .handle_auth_requests(false)
450            .build();
451
452        timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
453            .await
454            .map_err(|_| BrowserError::Timeout {
455                operation: "Fetch.enable".to_string(),
456                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
457            })?
458            .map_err(|e| BrowserError::CdpError {
459                operation: "Fetch.enable".to_string(),
460                message: e.to_string(),
461            })?;
462
463        debug!("Resource filter active: {:?}", filter);
464        Ok(())
465    }
466
467    /// Return the current page URL (post-navigation, post-redirect).
468    ///
469    /// Delegates to the CDP `Target.getTargetInfo` binding already used
470    /// internally by [`save_cookies`](Self::save_cookies); no extra network
471    /// request is made.  Returns an empty string if the URL is not yet set
472    /// (e.g. on a blank tab before the first navigation).
473    ///
474    /// # Errors
475    ///
476    /// Returns [`BrowserError::CdpError`] if the underlying CDP call fails, or
477    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
478    ///
479    /// # Example
480    ///
481    /// ```no_run
482    /// use stygian_browser::{BrowserPool, BrowserConfig};
483    /// use stygian_browser::page::WaitUntil;
484    /// use std::time::Duration;
485    ///
486    /// # async fn run() -> stygian_browser::error::Result<()> {
487    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
488    /// let handle = pool.acquire().await?;
489    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
490    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
491    /// let url = page.url().await?;
492    /// println!("Final URL after redirects: {url}");
493    /// # Ok(())
494    /// # }
495    /// ```
496    pub async fn url(&self) -> Result<String> {
497        timeout(self.cdp_timeout, self.page.url())
498            .await
499            .map_err(|_| BrowserError::Timeout {
500                operation: "page.url".to_string(),
501                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
502            })?
503            .map_err(|e| BrowserError::CdpError {
504                operation: "page.url".to_string(),
505                message: e.to_string(),
506            })
507            .map(Option::unwrap_or_default)
508    }
509
510    /// Return the HTTP status code of the most recent main-frame navigation.
511    ///
512    /// The status is captured from the `Network.responseReceived` CDP event
513    /// wired up inside [`navigate`](Self::navigate), so it reflects the
514    /// *final* response after any server-side redirects.
515    ///
516    /// Returns `None` if the status was not captured — for example on `file://`
517    /// navigations, when [`navigate`](Self::navigate) has not yet been called,
518    /// or if the network event subscription failed.
519    ///
520    /// # Errors
521    ///
522    /// This method is infallible; the `Result` wrapper is kept for API
523    /// consistency with other `PageHandle` methods.
524    ///
525    /// # Example
526    ///
527    /// ```no_run
528    /// use stygian_browser::{BrowserPool, BrowserConfig};
529    /// use stygian_browser::page::WaitUntil;
530    /// use std::time::Duration;
531    ///
532    /// # async fn run() -> stygian_browser::error::Result<()> {
533    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
534    /// let handle = pool.acquire().await?;
535    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
536    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
537    /// if let Some(code) = page.status_code()? {
538    ///     println!("HTTP {code}");
539    /// }
540    /// # Ok(())
541    /// # }
542    /// ```
543    pub fn status_code(&self) -> Result<Option<u16>> {
544        let code = self.last_status_code.load(Ordering::Acquire);
545        Ok(if code == 0 { None } else { Some(code) })
546    }
547
548    /// Return the page's `<title>` text.
549    ///
550    /// # Errors
551    ///
552    /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
553    pub async fn title(&self) -> Result<String> {
554        timeout(self.cdp_timeout, self.page.get_title())
555            .await
556            .map_err(|_| BrowserError::Timeout {
557                operation: "get_title".to_string(),
558                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
559            })?
560            .map_err(|e| BrowserError::ScriptExecutionFailed {
561                script: "document.title".to_string(),
562                reason: e.to_string(),
563            })
564            .map(Option::unwrap_or_default)
565    }
566
567    /// Return the page's full outer HTML.
568    ///
569    /// # Errors
570    ///
571    /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
572    pub async fn content(&self) -> Result<String> {
573        timeout(self.cdp_timeout, self.page.content())
574            .await
575            .map_err(|_| BrowserError::Timeout {
576                operation: "page.content".to_string(),
577                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
578            })?
579            .map_err(|e| BrowserError::ScriptExecutionFailed {
580                script: "document.documentElement.outerHTML".to_string(),
581                reason: e.to_string(),
582            })
583    }
584
585    /// Evaluate arbitrary JavaScript and return the result as `T`.
586    ///
587    /// # Errors
588    ///
589    /// Returns [`BrowserError::ScriptExecutionFailed`] on eval failure or
590    /// deserialization error.
591    pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
592        let script_owned = script.to_string();
593        timeout(self.cdp_timeout, self.page.evaluate(script))
594            .await
595            .map_err(|_| BrowserError::Timeout {
596                operation: "page.evaluate".to_string(),
597                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
598            })?
599            .map_err(|e| BrowserError::ScriptExecutionFailed {
600                script: script_owned.clone(),
601                reason: e.to_string(),
602            })?
603            .into_value::<T>()
604            .map_err(|e| BrowserError::ScriptExecutionFailed {
605                script: script_owned,
606                reason: e.to_string(),
607            })
608    }
609
610    /// Save all cookies for the current page's origin.
611    ///
612    /// # Errors
613    ///
614    /// Returns [`BrowserError::CdpError`] if the CDP call fails.
615    pub async fn save_cookies(
616        &self,
617    ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
618        use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
619
620        let url = self
621            .page
622            .url()
623            .await
624            .map_err(|e| BrowserError::CdpError {
625                operation: "page.url".to_string(),
626                message: e.to_string(),
627            })?
628            .unwrap_or_default();
629
630        timeout(
631            self.cdp_timeout,
632            self.page
633                .execute(GetCookiesParams::builder().urls(vec![url]).build()),
634        )
635        .await
636        .map_err(|_| BrowserError::Timeout {
637            operation: "Network.getCookies".to_string(),
638            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
639        })?
640        .map_err(|e| BrowserError::CdpError {
641            operation: "Network.getCookies".to_string(),
642            message: e.to_string(),
643        })
644        .map(|r| r.cookies.clone())
645    }
646
647    /// Capture a screenshot of the current page as PNG bytes.
648    ///
649    /// The screenshot is full-page by default (viewport clipped to the rendered
650    /// layout area).  Save the returned bytes to a `.png` file or process
651    /// them in-memory.
652    ///
653    /// # Errors
654    ///
655    /// Returns [`BrowserError::CdpError`] if the CDP `Page.captureScreenshot`
656    /// command fails, or [`BrowserError::Timeout`] if it exceeds
657    /// `cdp_timeout`.
658    ///
659    /// # Example
660    ///
661    /// ```no_run
662    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
663    /// use std::{time::Duration, fs};
664    ///
665    /// # async fn run() -> stygian_browser::error::Result<()> {
666    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
667    /// let handle = pool.acquire().await?;
668    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
669    /// page.navigate("https://example.com", WaitUntil::Selector("body".to_string()), Duration::from_secs(30)).await?;
670    /// let png = page.screenshot().await?;
671    /// fs::write("screenshot.png", &png).unwrap();
672    /// # Ok(())
673    /// # }
674    /// ```
675    pub async fn screenshot(&self) -> Result<Vec<u8>> {
676        use chromiumoxide::page::ScreenshotParams;
677
678        let params = ScreenshotParams::builder().full_page(true).build();
679
680        timeout(self.cdp_timeout, self.page.screenshot(params))
681            .await
682            .map_err(|_| BrowserError::Timeout {
683                operation: "Page.captureScreenshot".to_string(),
684                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
685            })?
686            .map_err(|e| BrowserError::CdpError {
687                operation: "Page.captureScreenshot".to_string(),
688                message: e.to_string(),
689            })
690    }
691
692    /// Borrow the underlying chromiumoxide [`Page`].
693    pub const fn inner(&self) -> &Page {
694        &self.page
695    }
696
697    /// Close this page (tab).
698    ///
699    /// Called automatically on drop; explicit call avoids suppressing the error.
700    pub async fn close(self) -> Result<()> {
701        timeout(Duration::from_secs(5), self.page.clone().close())
702            .await
703            .map_err(|_| BrowserError::Timeout {
704                operation: "page.close".to_string(),
705                duration_ms: 5000,
706            })?
707            .map_err(|e| BrowserError::CdpError {
708                operation: "page.close".to_string(),
709                message: e.to_string(),
710            })
711    }
712}
713
714impl Drop for PageHandle {
715    fn drop(&mut self) {
716        warn!("PageHandle dropped without explicit close(); spawning cleanup task");
717        // chromiumoxide Page does not implement close on Drop, so we spawn
718        // a fire-and-forget task. The page ref is already owned; we need to
719        // swap it out. We clone the Page handle (it's Arc-backed internally).
720        let page = self.page.clone();
721        tokio::spawn(async move {
722            let _ = page.close().await;
723        });
724    }
725}
726
727// ─── Tests ────────────────────────────────────────────────────────────────────
728
729#[cfg(test)]
730mod tests {
731    use super::*;
732
733    #[test]
734    fn resource_filter_block_media_blocks_image() {
735        let filter = ResourceFilter::block_media();
736        assert!(filter.should_block("Image"));
737        assert!(filter.should_block("Font"));
738        assert!(filter.should_block("Stylesheet"));
739        assert!(filter.should_block("Media"));
740        assert!(!filter.should_block("Script"));
741        assert!(!filter.should_block("XHR"));
742    }
743
744    #[test]
745    fn resource_filter_case_insensitive() {
746        let filter = ResourceFilter::block_images_and_fonts();
747        assert!(filter.should_block("image")); // lowercase
748        assert!(filter.should_block("IMAGE")); // uppercase
749        assert!(!filter.should_block("Stylesheet"));
750    }
751
752    #[test]
753    fn resource_filter_builder_chain() {
754        let filter = ResourceFilter::default()
755            .block(ResourceType::Image)
756            .block(ResourceType::Font);
757        assert!(filter.should_block("Image"));
758        assert!(filter.should_block("Font"));
759        assert!(!filter.should_block("Stylesheet"));
760    }
761
762    #[test]
763    fn resource_filter_dedup_block() {
764        let filter = ResourceFilter::default()
765            .block(ResourceType::Image)
766            .block(ResourceType::Image); // duplicate
767        assert_eq!(filter.blocked.len(), 1);
768    }
769
770    #[test]
771    fn resource_filter_is_empty_when_default() {
772        assert!(ResourceFilter::default().is_empty());
773        assert!(!ResourceFilter::block_media().is_empty());
774    }
775
776    #[test]
777    fn wait_until_selector_stores_string() {
778        let w = WaitUntil::Selector("#foo".to_string());
779        assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
780    }
781
782    #[test]
783    fn resource_type_cdp_str() {
784        assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
785        assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
786        assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
787        assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
788    }
789
790    /// `PageHandle` must be `Send + Sync` for use across thread boundaries.
791    #[test]
792    fn page_handle_is_send_sync() {
793        fn assert_send<T: Send>() {}
794        fn assert_sync<T: Sync>() {}
795        assert_send::<PageHandle>();
796        assert_sync::<PageHandle>();
797    }
798
799    /// The status-code sentinel (0 = "not yet captured") and the conversion to
800    /// `Option<u16>` are pure-logic invariants testable without a live browser.
801    #[test]
802    fn status_code_sentinel_zero_maps_to_none() {
803        use std::sync::atomic::{AtomicU16, Ordering};
804        let atom = AtomicU16::new(0);
805        let code = atom.load(Ordering::Acquire);
806        assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
807    }
808
809    #[test]
810    fn status_code_non_zero_maps_to_some() {
811        use std::sync::atomic::{AtomicU16, Ordering};
812        for &expected in &[200u16, 301, 404, 503] {
813            let atom = AtomicU16::new(expected);
814            let code = atom.load(Ordering::Acquire);
815            assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
816        }
817    }
818}