stygian_browser/page.rs
1//! Page and browsing context management for isolated, parallel scraping
2//!
3//! Each `BrowserContext` (future) is an incognito-style isolation boundary (separate
4//! cookies, localStorage, cache). Each context can contain many [`PageHandle`]s
5//! (tabs). Both types clean up their CDP resources automatically on drop.
6//!
7//! ## Resource blocking
8//!
9//! Pass a [`ResourceFilter`] to [`PageHandle::set_resource_filter`] to intercept
10//! and block specific request types (images, fonts, CSS) before page load —
11//! significantly reducing page load times for text-only scraping.
12//!
13//! ## Wait strategies
14//!
15//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
16//! - `DomContentLoaded` — fires when the HTML is parsed
17//! - `NetworkIdle` — fires when there are ≤2 in-flight requests for 500 ms
18//! - `Selector(css)` — fires when a CSS selector matches an element
19//!
20//! # Example
21//!
22//! ```no_run
23//! use stygian_browser::{BrowserPool, BrowserConfig};
24//! use stygian_browser::page::{ResourceFilter, WaitUntil};
25//! use std::time::Duration;
26//!
27//! # async fn run() -> stygian_browser::error::Result<()> {
28//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
29//! let handle = pool.acquire().await?;
30//!
31//! let mut page = handle.browser().expect("valid browser").new_page().await?;
32//! page.set_resource_filter(ResourceFilter::block_media()).await?;
33//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
34//! let title = page.title().await?;
35//! println!("title: {title}");
36//! handle.release().await;
37//! # Ok(())
38//! # }
39//! ```
40
41use std::sync::{
42 Arc,
43 atomic::{AtomicU16, Ordering},
44};
45use std::time::Duration;
46
47use chromiumoxide::Page;
48use tokio::time::timeout;
49use tracing::{debug, warn};
50
51use crate::error::{BrowserError, Result};
52
53// ─── ResourceType ─────────────────────────────────────────────────────────────
54
55/// CDP resource types that can be intercepted.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub enum ResourceType {
58 /// `<img>`, `<picture>`, background images
59 Image,
60 /// Web fonts loaded via CSS `@font-face`
61 Font,
62 /// External CSS stylesheets
63 Stylesheet,
64 /// Media files (audio/video)
65 Media,
66}
67
68impl ResourceType {
69 /// Returns the string used in CDP `Network.requestIntercepted` events.
70 pub const fn as_cdp_str(&self) -> &'static str {
71 match self {
72 Self::Image => "Image",
73 Self::Font => "Font",
74 Self::Stylesheet => "Stylesheet",
75 Self::Media => "Media",
76 }
77 }
78}
79
80// ─── ResourceFilter ───────────────────────────────────────────────────────────
81
82/// Set of resource types to block from loading.
83///
84/// # Example
85///
86/// ```
87/// use stygian_browser::page::ResourceFilter;
88/// let filter = ResourceFilter::block_media();
89/// assert!(filter.should_block("Image"));
90/// ```
91#[derive(Debug, Clone, Default)]
92pub struct ResourceFilter {
93 blocked: Vec<ResourceType>,
94}
95
96impl ResourceFilter {
97 /// Block all media resources (images, fonts, CSS, audio/video).
98 pub fn block_media() -> Self {
99 Self {
100 blocked: vec![
101 ResourceType::Image,
102 ResourceType::Font,
103 ResourceType::Stylesheet,
104 ResourceType::Media,
105 ],
106 }
107 }
108
109 /// Block only images and fonts (keep styles for layout-sensitive work).
110 pub fn block_images_and_fonts() -> Self {
111 Self {
112 blocked: vec![ResourceType::Image, ResourceType::Font],
113 }
114 }
115
116 /// Add a resource type to the block list.
117 #[must_use]
118 pub fn block(mut self, resource: ResourceType) -> Self {
119 if !self.blocked.contains(&resource) {
120 self.blocked.push(resource);
121 }
122 self
123 }
124
125 /// Returns `true` if the given CDP resource type string should be blocked.
126 pub fn should_block(&self, cdp_type: &str) -> bool {
127 self.blocked
128 .iter()
129 .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
130 }
131
132 /// Returns `true` if no resource types are blocked.
133 pub const fn is_empty(&self) -> bool {
134 self.blocked.is_empty()
135 }
136}
137
138// ─── WaitUntil ────────────────────────────────────────────────────────────────
139
140/// Condition to wait for after a navigation.
141///
142/// # Example
143///
144/// ```
145/// use stygian_browser::page::WaitUntil;
146/// let w = WaitUntil::Selector("#main".to_string());
147/// assert!(matches!(w, WaitUntil::Selector(_)));
148/// ```
149#[derive(Debug, Clone)]
150pub enum WaitUntil {
151 /// Wait for the `Page.domContentEventFired` CDP event — fires when the HTML
152 /// document has been fully parsed and the DOM is ready, before subresources
153 /// such as images and stylesheets finish loading.
154 DomContentLoaded,
155 /// Wait for the `Page.loadEventFired` CDP event **and** then wait until no
156 /// more than 2 network requests are in-flight for at least 500 ms
157 /// (equivalent to Playwright's `networkidle2`).
158 NetworkIdle,
159 /// Wait until `document.querySelector(selector)` returns a non-null element.
160 Selector(String),
161}
162
163// ─── PageHandle ───────────────────────────────────────────────────────────────
164
165/// A handle to an open browser tab.
166///
167/// On drop the underlying page is closed automatically.
168///
169/// # Example
170///
171/// ```no_run
172/// use stygian_browser::{BrowserPool, BrowserConfig};
173/// use stygian_browser::page::WaitUntil;
174/// use std::time::Duration;
175///
176/// # async fn run() -> stygian_browser::error::Result<()> {
177/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
178/// let handle = pool.acquire().await?;
179/// let mut page = handle.browser().expect("valid browser").new_page().await?;
180/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
181/// let html = page.content().await?;
182/// drop(page); // closes the tab
183/// handle.release().await;
184/// # Ok(())
185/// # }
186/// ```
187pub struct PageHandle {
188 page: Page,
189 cdp_timeout: Duration,
190 /// HTTP status code of the most recent main-frame navigation, or `0` if not
191 /// yet captured. Written atomically by the listener spawned in `navigate()`.
192 last_status_code: Arc<AtomicU16>,
193}
194
195impl PageHandle {
196 /// Wrap a raw chromiumoxide [`Page`] in a handle.
197 pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
198 Self {
199 page,
200 cdp_timeout,
201 last_status_code: Arc::new(AtomicU16::new(0)),
202 }
203 }
204
205 /// Navigate to `url` and wait for `condition` within `nav_timeout`.
206 ///
207 /// # Errors
208 ///
209 /// Returns [`BrowserError::NavigationFailed`] if the navigation times out or
210 /// the CDP call fails.
211 pub async fn navigate(
212 &mut self,
213 url: &str,
214 condition: WaitUntil,
215 nav_timeout: Duration,
216 ) -> Result<()> {
217 self.setup_status_capture().await;
218 timeout(
219 nav_timeout,
220 self.navigate_inner(url, condition, nav_timeout),
221 )
222 .await
223 .map_err(|_| BrowserError::NavigationFailed {
224 url: url.to_string(),
225 reason: format!("navigation timed out after {nav_timeout:?}"),
226 })?
227 }
228
229 /// Reset the last status code and wire up the `Network.responseReceived`
230 /// listener before any navigation starts. Errors are logged and swallowed
231 /// so that a missing network domain never blocks navigation.
232 async fn setup_status_capture(&self) {
233 use chromiumoxide::cdp::browser_protocol::network::{
234 EventResponseReceived, ResourceType as NetworkResourceType,
235 };
236 use futures::StreamExt;
237
238 // Reset so a stale code is not returned if the new navigation fails
239 // before the response headers arrive.
240 self.last_status_code.store(0, Ordering::Release);
241
242 // Subscribe *before* goto() — the listener runs in a detached task and
243 // stores the first Document-type response status atomically.
244 let page_for_listener = self.page.clone();
245 let status_capture = Arc::clone(&self.last_status_code);
246 match page_for_listener
247 .event_listener::<EventResponseReceived>()
248 .await
249 {
250 Ok(mut stream) => {
251 tokio::spawn(async move {
252 while let Some(event) = stream.next().await {
253 if event.r#type == NetworkResourceType::Document {
254 let code = u16::try_from(event.response.status).unwrap_or(0);
255 if code > 0 {
256 status_capture.store(code, Ordering::Release);
257 }
258 break;
259 }
260 }
261 });
262 }
263 Err(e) => warn!("status-code capture unavailable: {e}"),
264 }
265 }
266
267 /// Subscribe to the appropriate CDP events, fire `goto`, then await
268 /// `condition`. All subscriptions precede `goto` to eliminate the race
269 /// described in issue #7.
270 async fn navigate_inner(
271 &self,
272 url: &str,
273 condition: WaitUntil,
274 nav_timeout: Duration,
275 ) -> Result<()> {
276 use chromiumoxide::cdp::browser_protocol::page::{
277 EventDomContentEventFired, EventLoadEventFired,
278 };
279 use futures::StreamExt;
280
281 let url_owned = url.to_string();
282
283 let mut dom_events = match &condition {
284 WaitUntil::DomContentLoaded => Some(
285 self.page
286 .event_listener::<EventDomContentEventFired>()
287 .await
288 .map_err(|e| BrowserError::NavigationFailed {
289 url: url_owned.clone(),
290 reason: e.to_string(),
291 })?,
292 ),
293 _ => None,
294 };
295
296 let mut load_events = match &condition {
297 WaitUntil::NetworkIdle => Some(
298 self.page
299 .event_listener::<EventLoadEventFired>()
300 .await
301 .map_err(|e| BrowserError::NavigationFailed {
302 url: url_owned.clone(),
303 reason: e.to_string(),
304 })?,
305 ),
306 _ => None,
307 };
308
309 let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
310 Some(self.subscribe_inflight_counter().await)
311 } else {
312 None
313 };
314
315 self.page
316 .goto(url)
317 .await
318 .map_err(|e| BrowserError::NavigationFailed {
319 url: url_owned.clone(),
320 reason: e.to_string(),
321 })?;
322
323 match &condition {
324 WaitUntil::DomContentLoaded => {
325 if let Some(ref mut events) = dom_events {
326 let _ = events.next().await;
327 }
328 }
329 WaitUntil::NetworkIdle => {
330 if let Some(ref mut events) = load_events {
331 let _ = events.next().await;
332 }
333 if let Some(ref counter) = inflight {
334 Self::wait_network_idle(counter).await;
335 }
336 }
337 WaitUntil::Selector(css) => {
338 self.wait_for_selector(css, nav_timeout).await?;
339 }
340 }
341 Ok(())
342 }
343
344 /// Spawn three detached tasks that maintain a signed in-flight request
345 /// counter via `Network.requestWillBeSent` (+1) and
346 /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
347 /// Returns the shared counter so the caller can poll it.
348 async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
349 use std::sync::atomic::AtomicI32;
350
351 use chromiumoxide::cdp::browser_protocol::network::{
352 EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
353 };
354 use futures::StreamExt;
355
356 let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
357 let pairs: [(Arc<AtomicI32>, i32); 3] = [
358 (Arc::clone(&counter), 1),
359 (Arc::clone(&counter), -1),
360 (Arc::clone(&counter), -1),
361 ];
362 let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
363
364 macro_rules! spawn_tracker {
365 ($page:expr, $event:ty, $c:expr, $delta:expr) => {
366 match $page.event_listener::<$event>().await {
367 Ok(mut s) => {
368 let c = $c;
369 let d = $delta;
370 tokio::spawn(async move {
371 while s.next().await.is_some() {
372 c.fetch_add(d, Ordering::Relaxed);
373 }
374 });
375 }
376 Err(e) => warn!("network-idle tracker unavailable: {e}"),
377 }
378 };
379 }
380
381 let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
382 spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
383 spawn_tracker!(p2, EventLoadingFinished, c2, d2);
384 spawn_tracker!(p3, EventLoadingFailed, c3, d3);
385
386 counter
387 }
388
389 /// Poll `counter` until ≤ 2 in-flight requests persist for 500 ms
390 /// (equivalent to Playwright's `networkidle2`).
391 async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
392 const IDLE_THRESHOLD: i32 = 2;
393 const SETTLE: Duration = Duration::from_millis(500);
394 loop {
395 if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
396 tokio::time::sleep(SETTLE).await;
397 if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
398 break;
399 }
400 } else {
401 tokio::time::sleep(Duration::from_millis(50)).await;
402 }
403 }
404 }
405
406 /// Wait until `document.querySelector(selector)` is non-null (`timeout`).
407 ///
408 /// # Errors
409 ///
410 /// Returns [`BrowserError::NavigationFailed`] if the selector is not found
411 /// within the given timeout.
412 pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
413 let selector_owned = selector.to_string();
414 let poll = async {
415 loop {
416 if self.page.find_element(selector_owned.clone()).await.is_ok() {
417 return Ok(());
418 }
419 tokio::time::sleep(Duration::from_millis(100)).await;
420 }
421 };
422
423 timeout(wait_timeout, poll)
424 .await
425 .map_err(|_| BrowserError::NavigationFailed {
426 url: String::new(),
427 reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
428 })?
429 }
430
431 /// Set a resource filter to block specific network request types.
432 ///
433 /// **Note:** Requires Network.enable; called automatically.
434 ///
435 /// # Errors
436 ///
437 /// Returns a [`BrowserError::CdpError`] if the CDP call fails.
438 pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
439 use chromiumoxide::cdp::browser_protocol::fetch::{EnableParams, RequestPattern};
440
441 if filter.is_empty() {
442 return Ok(());
443 }
444
445 // Both builders are infallible — they return the struct directly (not Result)
446 let pattern = RequestPattern::builder().url_pattern("*").build();
447 let params = EnableParams::builder()
448 .patterns(vec![pattern])
449 .handle_auth_requests(false)
450 .build();
451
452 timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
453 .await
454 .map_err(|_| BrowserError::Timeout {
455 operation: "Fetch.enable".to_string(),
456 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
457 })?
458 .map_err(|e| BrowserError::CdpError {
459 operation: "Fetch.enable".to_string(),
460 message: e.to_string(),
461 })?;
462
463 debug!("Resource filter active: {:?}", filter);
464 Ok(())
465 }
466
467 /// Return the current page URL (post-navigation, post-redirect).
468 ///
469 /// Delegates to the CDP `Target.getTargetInfo` binding already used
470 /// internally by [`save_cookies`](Self::save_cookies); no extra network
471 /// request is made. Returns an empty string if the URL is not yet set
472 /// (e.g. on a blank tab before the first navigation).
473 ///
474 /// # Errors
475 ///
476 /// Returns [`BrowserError::CdpError`] if the underlying CDP call fails, or
477 /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
478 ///
479 /// # Example
480 ///
481 /// ```no_run
482 /// use stygian_browser::{BrowserPool, BrowserConfig};
483 /// use stygian_browser::page::WaitUntil;
484 /// use std::time::Duration;
485 ///
486 /// # async fn run() -> stygian_browser::error::Result<()> {
487 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
488 /// let handle = pool.acquire().await?;
489 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
490 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
491 /// let url = page.url().await?;
492 /// println!("Final URL after redirects: {url}");
493 /// # Ok(())
494 /// # }
495 /// ```
496 pub async fn url(&self) -> Result<String> {
497 timeout(self.cdp_timeout, self.page.url())
498 .await
499 .map_err(|_| BrowserError::Timeout {
500 operation: "page.url".to_string(),
501 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
502 })?
503 .map_err(|e| BrowserError::CdpError {
504 operation: "page.url".to_string(),
505 message: e.to_string(),
506 })
507 .map(Option::unwrap_or_default)
508 }
509
510 /// Return the HTTP status code of the most recent main-frame navigation.
511 ///
512 /// The status is captured from the `Network.responseReceived` CDP event
513 /// wired up inside [`navigate`](Self::navigate), so it reflects the
514 /// *final* response after any server-side redirects.
515 ///
516 /// Returns `None` if the status was not captured — for example on `file://`
517 /// navigations, when [`navigate`](Self::navigate) has not yet been called,
518 /// or if the network event subscription failed.
519 ///
520 /// # Errors
521 ///
522 /// This method is infallible; the `Result` wrapper is kept for API
523 /// consistency with other `PageHandle` methods.
524 ///
525 /// # Example
526 ///
527 /// ```no_run
528 /// use stygian_browser::{BrowserPool, BrowserConfig};
529 /// use stygian_browser::page::WaitUntil;
530 /// use std::time::Duration;
531 ///
532 /// # async fn run() -> stygian_browser::error::Result<()> {
533 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
534 /// let handle = pool.acquire().await?;
535 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
536 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
537 /// if let Some(code) = page.status_code()? {
538 /// println!("HTTP {code}");
539 /// }
540 /// # Ok(())
541 /// # }
542 /// ```
543 pub fn status_code(&self) -> Result<Option<u16>> {
544 let code = self.last_status_code.load(Ordering::Acquire);
545 Ok(if code == 0 { None } else { Some(code) })
546 }
547
548 /// Return the page's `<title>` text.
549 ///
550 /// # Errors
551 ///
552 /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
553 pub async fn title(&self) -> Result<String> {
554 timeout(self.cdp_timeout, self.page.get_title())
555 .await
556 .map_err(|_| BrowserError::Timeout {
557 operation: "get_title".to_string(),
558 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
559 })?
560 .map_err(|e| BrowserError::ScriptExecutionFailed {
561 script: "document.title".to_string(),
562 reason: e.to_string(),
563 })
564 .map(Option::unwrap_or_default)
565 }
566
567 /// Return the page's full outer HTML.
568 ///
569 /// # Errors
570 ///
571 /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
572 pub async fn content(&self) -> Result<String> {
573 timeout(self.cdp_timeout, self.page.content())
574 .await
575 .map_err(|_| BrowserError::Timeout {
576 operation: "page.content".to_string(),
577 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
578 })?
579 .map_err(|e| BrowserError::ScriptExecutionFailed {
580 script: "document.documentElement.outerHTML".to_string(),
581 reason: e.to_string(),
582 })
583 }
584
585 /// Evaluate arbitrary JavaScript and return the result as `T`.
586 ///
587 /// # Errors
588 ///
589 /// Returns [`BrowserError::ScriptExecutionFailed`] on eval failure or
590 /// deserialization error.
591 pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
592 let script_owned = script.to_string();
593 timeout(self.cdp_timeout, self.page.evaluate(script))
594 .await
595 .map_err(|_| BrowserError::Timeout {
596 operation: "page.evaluate".to_string(),
597 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
598 })?
599 .map_err(|e| BrowserError::ScriptExecutionFailed {
600 script: script_owned.clone(),
601 reason: e.to_string(),
602 })?
603 .into_value::<T>()
604 .map_err(|e| BrowserError::ScriptExecutionFailed {
605 script: script_owned,
606 reason: e.to_string(),
607 })
608 }
609
610 /// Save all cookies for the current page's origin.
611 ///
612 /// # Errors
613 ///
614 /// Returns [`BrowserError::CdpError`] if the CDP call fails.
615 pub async fn save_cookies(
616 &self,
617 ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
618 use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
619
620 let url = self
621 .page
622 .url()
623 .await
624 .map_err(|e| BrowserError::CdpError {
625 operation: "page.url".to_string(),
626 message: e.to_string(),
627 })?
628 .unwrap_or_default();
629
630 timeout(
631 self.cdp_timeout,
632 self.page
633 .execute(GetCookiesParams::builder().urls(vec![url]).build()),
634 )
635 .await
636 .map_err(|_| BrowserError::Timeout {
637 operation: "Network.getCookies".to_string(),
638 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
639 })?
640 .map_err(|e| BrowserError::CdpError {
641 operation: "Network.getCookies".to_string(),
642 message: e.to_string(),
643 })
644 .map(|r| r.cookies.clone())
645 }
646
647 /// Capture a screenshot of the current page as PNG bytes.
648 ///
649 /// The screenshot is full-page by default (viewport clipped to the rendered
650 /// layout area). Save the returned bytes to a `.png` file or process
651 /// them in-memory.
652 ///
653 /// # Errors
654 ///
655 /// Returns [`BrowserError::CdpError`] if the CDP `Page.captureScreenshot`
656 /// command fails, or [`BrowserError::Timeout`] if it exceeds
657 /// `cdp_timeout`.
658 ///
659 /// # Example
660 ///
661 /// ```no_run
662 /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
663 /// use std::{time::Duration, fs};
664 ///
665 /// # async fn run() -> stygian_browser::error::Result<()> {
666 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
667 /// let handle = pool.acquire().await?;
668 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
669 /// page.navigate("https://example.com", WaitUntil::Selector("body".to_string()), Duration::from_secs(30)).await?;
670 /// let png = page.screenshot().await?;
671 /// fs::write("screenshot.png", &png).unwrap();
672 /// # Ok(())
673 /// # }
674 /// ```
675 pub async fn screenshot(&self) -> Result<Vec<u8>> {
676 use chromiumoxide::page::ScreenshotParams;
677
678 let params = ScreenshotParams::builder().full_page(true).build();
679
680 timeout(self.cdp_timeout, self.page.screenshot(params))
681 .await
682 .map_err(|_| BrowserError::Timeout {
683 operation: "Page.captureScreenshot".to_string(),
684 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
685 })?
686 .map_err(|e| BrowserError::CdpError {
687 operation: "Page.captureScreenshot".to_string(),
688 message: e.to_string(),
689 })
690 }
691
692 /// Borrow the underlying chromiumoxide [`Page`].
693 pub const fn inner(&self) -> &Page {
694 &self.page
695 }
696
697 /// Close this page (tab).
698 ///
699 /// Called automatically on drop; explicit call avoids suppressing the error.
700 pub async fn close(self) -> Result<()> {
701 timeout(Duration::from_secs(5), self.page.clone().close())
702 .await
703 .map_err(|_| BrowserError::Timeout {
704 operation: "page.close".to_string(),
705 duration_ms: 5000,
706 })?
707 .map_err(|e| BrowserError::CdpError {
708 operation: "page.close".to_string(),
709 message: e.to_string(),
710 })
711 }
712}
713
714impl Drop for PageHandle {
715 fn drop(&mut self) {
716 warn!("PageHandle dropped without explicit close(); spawning cleanup task");
717 // chromiumoxide Page does not implement close on Drop, so we spawn
718 // a fire-and-forget task. The page ref is already owned; we need to
719 // swap it out. We clone the Page handle (it's Arc-backed internally).
720 let page = self.page.clone();
721 tokio::spawn(async move {
722 let _ = page.close().await;
723 });
724 }
725}
726
727// ─── Tests ────────────────────────────────────────────────────────────────────
728
729#[cfg(test)]
730mod tests {
731 use super::*;
732
733 #[test]
734 fn resource_filter_block_media_blocks_image() {
735 let filter = ResourceFilter::block_media();
736 assert!(filter.should_block("Image"));
737 assert!(filter.should_block("Font"));
738 assert!(filter.should_block("Stylesheet"));
739 assert!(filter.should_block("Media"));
740 assert!(!filter.should_block("Script"));
741 assert!(!filter.should_block("XHR"));
742 }
743
744 #[test]
745 fn resource_filter_case_insensitive() {
746 let filter = ResourceFilter::block_images_and_fonts();
747 assert!(filter.should_block("image")); // lowercase
748 assert!(filter.should_block("IMAGE")); // uppercase
749 assert!(!filter.should_block("Stylesheet"));
750 }
751
752 #[test]
753 fn resource_filter_builder_chain() {
754 let filter = ResourceFilter::default()
755 .block(ResourceType::Image)
756 .block(ResourceType::Font);
757 assert!(filter.should_block("Image"));
758 assert!(filter.should_block("Font"));
759 assert!(!filter.should_block("Stylesheet"));
760 }
761
762 #[test]
763 fn resource_filter_dedup_block() {
764 let filter = ResourceFilter::default()
765 .block(ResourceType::Image)
766 .block(ResourceType::Image); // duplicate
767 assert_eq!(filter.blocked.len(), 1);
768 }
769
770 #[test]
771 fn resource_filter_is_empty_when_default() {
772 assert!(ResourceFilter::default().is_empty());
773 assert!(!ResourceFilter::block_media().is_empty());
774 }
775
776 #[test]
777 fn wait_until_selector_stores_string() {
778 let w = WaitUntil::Selector("#foo".to_string());
779 assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
780 }
781
782 #[test]
783 fn resource_type_cdp_str() {
784 assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
785 assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
786 assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
787 assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
788 }
789
790 /// `PageHandle` must be `Send + Sync` for use across thread boundaries.
791 #[test]
792 fn page_handle_is_send_sync() {
793 fn assert_send<T: Send>() {}
794 fn assert_sync<T: Sync>() {}
795 assert_send::<PageHandle>();
796 assert_sync::<PageHandle>();
797 }
798
799 /// The status-code sentinel (0 = "not yet captured") and the conversion to
800 /// `Option<u16>` are pure-logic invariants testable without a live browser.
801 #[test]
802 fn status_code_sentinel_zero_maps_to_none() {
803 use std::sync::atomic::{AtomicU16, Ordering};
804 let atom = AtomicU16::new(0);
805 let code = atom.load(Ordering::Acquire);
806 assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
807 }
808
809 #[test]
810 fn status_code_non_zero_maps_to_some() {
811 use std::sync::atomic::{AtomicU16, Ordering};
812 for &expected in &[200u16, 301, 404, 503] {
813 let atom = AtomicU16::new(expected);
814 let code = atom.load(Ordering::Acquire);
815 assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
816 }
817 }
818}