stygian_browser/page.rs
1//! Page and browsing context management for isolated, parallel scraping
2//!
3//! Each `BrowserContext` (future) is an incognito-style isolation boundary (separate
4//! cookies, localStorage, cache). Each context can contain many [`PageHandle`]s
5//! (tabs). Both types clean up their CDP resources automatically on drop.
6//!
7//! ## Resource blocking
8//!
9//! Pass a [`ResourceFilter`] to [`PageHandle::set_resource_filter`] to intercept
10//! and block specific request types (images, fonts, CSS) before page load —
11//! significantly reducing page load times for text-only scraping.
12//!
13//! ## Wait strategies
14//!
15//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
16//! - `DomContentLoaded` — fires when the HTML is parsed
17//! - `NetworkIdle` — fires when there are ≤2 in-flight requests for 500 ms
18//! - `Selector(css)` — fires when a CSS selector matches an element
19//!
20//! # Example
21//!
22//! ```no_run
23//! use stygian_browser::{BrowserPool, BrowserConfig};
24//! use stygian_browser::page::{ResourceFilter, WaitUntil};
25//! use std::time::Duration;
26//!
27//! # async fn run() -> stygian_browser::error::Result<()> {
28//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
29//! let handle = pool.acquire().await?;
30//!
31//! let mut page = handle.browser().expect("valid browser").new_page().await?;
32//! page.set_resource_filter(ResourceFilter::block_media()).await?;
33//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
34//! let title = page.title().await?;
35//! println!("title: {title}");
36//! handle.release().await;
37//! # Ok(())
38//! # }
39//! ```
40
41use std::collections::HashMap;
42use std::sync::{
43 Arc,
44 atomic::{AtomicU16, Ordering},
45};
46use std::time::Duration;
47
48use chromiumoxide::Page;
49use tokio::time::timeout;
50use tracing::{debug, warn};
51
52use crate::error::{BrowserError, Result};
53
54// ─── ResourceType ─────────────────────────────────────────────────────────────
55
56/// CDP resource types that can be intercepted.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub enum ResourceType {
59 /// `<img>`, `<picture>`, background images
60 Image,
61 /// Web fonts loaded via CSS `@font-face`
62 Font,
63 /// External CSS stylesheets
64 Stylesheet,
65 /// Media files (audio/video)
66 Media,
67}
68
69impl ResourceType {
70 /// Returns the string used in CDP `Network.requestIntercepted` events.
71 pub const fn as_cdp_str(&self) -> &'static str {
72 match self {
73 Self::Image => "Image",
74 Self::Font => "Font",
75 Self::Stylesheet => "Stylesheet",
76 Self::Media => "Media",
77 }
78 }
79}
80
81// ─── ResourceFilter ───────────────────────────────────────────────────────────
82
83/// Set of resource types to block from loading.
84///
85/// # Example
86///
87/// ```
88/// use stygian_browser::page::ResourceFilter;
89/// let filter = ResourceFilter::block_media();
90/// assert!(filter.should_block("Image"));
91/// ```
92#[derive(Debug, Clone, Default)]
93pub struct ResourceFilter {
94 blocked: Vec<ResourceType>,
95}
96
97impl ResourceFilter {
98 /// Block all media resources (images, fonts, CSS, audio/video).
99 pub fn block_media() -> Self {
100 Self {
101 blocked: vec![
102 ResourceType::Image,
103 ResourceType::Font,
104 ResourceType::Stylesheet,
105 ResourceType::Media,
106 ],
107 }
108 }
109
110 /// Block only images and fonts (keep styles for layout-sensitive work).
111 pub fn block_images_and_fonts() -> Self {
112 Self {
113 blocked: vec![ResourceType::Image, ResourceType::Font],
114 }
115 }
116
117 /// Add a resource type to the block list.
118 #[must_use]
119 pub fn block(mut self, resource: ResourceType) -> Self {
120 if !self.blocked.contains(&resource) {
121 self.blocked.push(resource);
122 }
123 self
124 }
125
126 /// Returns `true` if the given CDP resource type string should be blocked.
127 pub fn should_block(&self, cdp_type: &str) -> bool {
128 self.blocked
129 .iter()
130 .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
131 }
132
133 /// Returns `true` if no resource types are blocked.
134 pub const fn is_empty(&self) -> bool {
135 self.blocked.is_empty()
136 }
137}
138
139// ─── WaitUntil ────────────────────────────────────────────────────────────────
140
141/// Condition to wait for after a navigation.
142///
143/// # Example
144///
145/// ```
146/// use stygian_browser::page::WaitUntil;
147/// let w = WaitUntil::Selector("#main".to_string());
148/// assert!(matches!(w, WaitUntil::Selector(_)));
149/// ```
150#[derive(Debug, Clone)]
151pub enum WaitUntil {
152 /// Wait for the `Page.domContentEventFired` CDP event — fires when the HTML
153 /// document has been fully parsed and the DOM is ready, before subresources
154 /// such as images and stylesheets finish loading.
155 DomContentLoaded,
156 /// Wait for the `Page.loadEventFired` CDP event **and** then wait until no
157 /// more than 2 network requests are in-flight for at least 500 ms
158 /// (equivalent to Playwright's `networkidle2`).
159 NetworkIdle,
160 /// Wait until `document.querySelector(selector)` returns a non-null element.
161 Selector(String),
162}
163
164// ─── NodeHandle ───────────────────────────────────────────────────────────────
165
166/// A handle to a live DOM node backed by a CDP `RemoteObjectId`.
167///
168/// Obtained via [`PageHandle::query_selector_all`]. Each method issues one or
169/// more CDP `Runtime.callFunctionOn` calls against the held V8 remote object
170/// reference — no HTML serialisation occurs.
171///
172/// A handle becomes **stale** after page navigation or if the underlying DOM
173/// node is removed. Stale calls return [`BrowserError::StaleNode`] so callers
174/// can distinguish them from other CDP failures.
175///
176/// # Example
177///
178/// ```no_run
179/// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
180/// use std::time::Duration;
181///
182/// # async fn run() -> stygian_browser::error::Result<()> {
183/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
184/// let handle = pool.acquire().await?;
185/// let mut page = handle.browser().expect("valid browser").new_page().await?;
186/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
187///
188/// for node in page.query_selector_all("a[href]").await? {
189/// let href = node.attr("href").await?;
190/// let text = node.text_content().await?;
191/// println!("{text}: {href:?}");
192/// }
193/// # Ok(())
194/// # }
195/// ```
196pub struct NodeHandle {
197 element: chromiumoxide::element::Element,
198 /// Original CSS selector — preserved for stale-node error messages only.
199 /// Shared via `Arc<str>` so all handles from a single query reuse the
200 /// same allocation rather than cloning a `String` per node.
201 selector: Arc<str>,
202 cdp_timeout: Duration,
203}
204
205impl NodeHandle {
206 /// Return a single attribute value, or `None` if the attribute is absent.
207 ///
208 /// Issues one `Runtime.callFunctionOn` CDP call (`el.getAttribute(name)`).
209 ///
210 /// # Errors
211 ///
212 /// Returns [`BrowserError::StaleNode`] when the remote object has been
213 /// invalidated, or [`BrowserError::Timeout`] / [`BrowserError::CdpError`]
214 /// on transport-level failures.
215 pub async fn attr(&self, name: &str) -> Result<Option<String>> {
216 timeout(self.cdp_timeout, self.element.attribute(name))
217 .await
218 .map_err(|_| BrowserError::Timeout {
219 operation: "NodeHandle::attr".to_string(),
220 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
221 })?
222 .map_err(|e| self.cdp_err_or_stale(&e, "attr"))
223 }
224
225 /// Return all attributes as a `HashMap<name, value>` in a **single**
226 /// CDP round-trip.
227 ///
228 /// Uses `DOM.getAttributes` (via the chromiumoxide `attributes()` API)
229 /// which returns a flat `[name, value, name, value, …]` list from the node
230 /// description — no per-attribute calls are needed.
231 ///
232 /// # Errors
233 ///
234 /// Returns [`BrowserError::StaleNode`] when the remote object has been
235 /// invalidated.
236 pub async fn attr_map(&self) -> Result<HashMap<String, String>> {
237 let flat = timeout(self.cdp_timeout, self.element.attributes())
238 .await
239 .map_err(|_| BrowserError::Timeout {
240 operation: "NodeHandle::attr_map".to_string(),
241 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
242 })?
243 .map_err(|e| self.cdp_err_or_stale(&e, "attr_map"))?;
244
245 let mut map = HashMap::with_capacity(flat.len() / 2);
246 for pair in flat.chunks_exact(2) {
247 if let [name, value] = pair {
248 map.insert(name.clone(), value.clone());
249 }
250 }
251 Ok(map)
252 }
253
254 /// Return the element's `textContent` (all text inside, no markup).
255 ///
256 /// Reads the DOM `textContent` property via a single JS eval — this is the
257 /// raw text concatenation of all descendant text nodes, independent of
258 /// layout or visibility (unlike `innerText`).
259 ///
260 /// Returns an empty string when the property is absent or null.
261 ///
262 /// # Errors
263 ///
264 /// Returns [`BrowserError::StaleNode`] when the remote object has been
265 /// invalidated.
266 pub async fn text_content(&self) -> Result<String> {
267 let returns = timeout(
268 self.cdp_timeout,
269 self.element
270 .call_js_fn(r"function() { return this.textContent ?? ''; }", true),
271 )
272 .await
273 .map_err(|_| BrowserError::Timeout {
274 operation: "NodeHandle::text_content".to_string(),
275 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
276 })?
277 .map_err(|e| self.cdp_err_or_stale(&e, "text_content"))?;
278
279 Ok(returns
280 .result
281 .value
282 .as_ref()
283 .and_then(|v| v.as_str())
284 .unwrap_or("")
285 .to_string())
286 }
287
288 /// Return the element's `innerHTML`.
289 ///
290 /// Returns an empty string when the property is absent or null.
291 ///
292 /// # Errors
293 ///
294 /// Returns [`BrowserError::StaleNode`] when the remote object has been
295 /// invalidated.
296 pub async fn inner_html(&self) -> Result<String> {
297 timeout(self.cdp_timeout, self.element.inner_html())
298 .await
299 .map_err(|_| BrowserError::Timeout {
300 operation: "NodeHandle::inner_html".to_string(),
301 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
302 })?
303 .map_err(|e| self.cdp_err_or_stale(&e, "inner_html"))
304 .map(Option::unwrap_or_default)
305 }
306
307 /// Return the element's `outerHTML`.
308 ///
309 /// Returns an empty string when the property is absent or null.
310 ///
311 /// # Errors
312 ///
313 /// Returns [`BrowserError::StaleNode`] when the remote object has been
314 /// invalidated.
315 pub async fn outer_html(&self) -> Result<String> {
316 timeout(self.cdp_timeout, self.element.outer_html())
317 .await
318 .map_err(|_| BrowserError::Timeout {
319 operation: "NodeHandle::outer_html".to_string(),
320 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
321 })?
322 .map_err(|e| self.cdp_err_or_stale(&e, "outer_html"))
323 .map(Option::unwrap_or_default)
324 }
325
326 /// Return the ancestor tag-name chain, root-last.
327 ///
328 /// Executes a single `Runtime.callFunctionOn` JavaScript function that
329 /// walks `parentElement` and collects tag names — no repeated CDP calls.
330 ///
331 /// ```text
332 /// // for <span> inside <p> inside <article> inside <body> inside <html>
333 /// ["p", "article", "body", "html"]
334 /// ```
335 ///
336 /// # Errors
337 ///
338 /// Returns [`BrowserError::StaleNode`] when the remote object has been
339 /// invalidated, or [`BrowserError::ScriptExecutionFailed`] when CDP
340 /// returns no value or the value is not a string array.
341 pub async fn ancestors(&self) -> Result<Vec<String>> {
342 let returns = timeout(
343 self.cdp_timeout,
344 self.element.call_js_fn(
345 r"function() {
346 const a = [];
347 let n = this.parentElement;
348 while (n) { a.push(n.tagName.toLowerCase()); n = n.parentElement; }
349 return a;
350 }",
351 true,
352 ),
353 )
354 .await
355 .map_err(|_| BrowserError::Timeout {
356 operation: "NodeHandle::ancestors".to_string(),
357 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
358 })?
359 .map_err(|e| self.cdp_err_or_stale(&e, "ancestors"))?;
360
361 // With returnByValue=true and an array return, CDP delivers the value
362 // as a JSON array directly — no JSON.stringify/re-parse needed.
363 // A missing or wrong-type value indicates an unexpected CDP failure.
364 let arr = returns
365 .result
366 .value
367 .as_ref()
368 .and_then(|v| v.as_array())
369 .ok_or_else(|| BrowserError::ScriptExecutionFailed {
370 script: "NodeHandle::ancestors".to_string(),
371 reason: "CDP returned no value or a non-array value for ancestors()".to_string(),
372 })?;
373
374 arr.iter()
375 .map(|v| {
376 v.as_str().map(ToString::to_string).ok_or_else(|| {
377 BrowserError::ScriptExecutionFailed {
378 script: "NodeHandle::ancestors".to_string(),
379 reason: format!("ancestor entry is not a string: {v}"),
380 }
381 })
382 })
383 .collect()
384 }
385
386 /// Return child elements matching `selector` as new [`NodeHandle`]s.
387 ///
388 /// Issues a single `Runtime.callFunctionOn` + `DOM.querySelectorAll`
389 /// call scoped to this element — not to the entire document.
390 ///
391 /// Returns an empty `Vec` when no children match (consistent with the JS
392 /// `querySelectorAll` contract).
393 ///
394 /// # Errors
395 ///
396 /// Returns [`BrowserError::StaleNode`] when the remote object has been
397 /// invalidated, or [`BrowserError::CdpError`] on transport failure.
398 pub async fn children_matching(&self, selector: &str) -> Result<Vec<Self>> {
399 let elements = timeout(self.cdp_timeout, self.element.find_elements(selector))
400 .await
401 .map_err(|_| BrowserError::Timeout {
402 operation: "NodeHandle::children_matching".to_string(),
403 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
404 })?
405 .map_err(|e| self.cdp_err_or_stale(&e, "children_matching"))?;
406
407 let selector_arc: Arc<str> = Arc::from(selector);
408 Ok(elements
409 .into_iter()
410 .map(|el| Self {
411 element: el,
412 selector: selector_arc.clone(),
413 cdp_timeout: self.cdp_timeout,
414 })
415 .collect())
416 }
417
418 /// Map a chromiumoxide `CdpError` to either [`BrowserError::StaleNode`]
419 /// (when the remote object reference has been invalidated) or
420 /// [`BrowserError::CdpError`] for all other failures.
421 fn cdp_err_or_stale(
422 &self,
423 err: &chromiumoxide::error::CdpError,
424 operation: &str,
425 ) -> BrowserError {
426 let msg = err.to_string();
427 if msg.contains("Cannot find object with id")
428 || msg.contains("context with specified id")
429 || msg.contains("Cannot find context")
430 {
431 BrowserError::StaleNode {
432 selector: self.selector.to_string(),
433 }
434 } else {
435 BrowserError::CdpError {
436 operation: operation.to_string(),
437 message: msg,
438 }
439 }
440 }
441}
442
443// ─── PageHandle ───────────────────────────────────────────────────────────────
444
445/// A handle to an open browser tab.
446///
447/// On drop the underlying page is closed automatically.
448///
449/// # Example
450///
451/// ```no_run
452/// use stygian_browser::{BrowserPool, BrowserConfig};
453/// use stygian_browser::page::WaitUntil;
454/// use std::time::Duration;
455///
456/// # async fn run() -> stygian_browser::error::Result<()> {
457/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
458/// let handle = pool.acquire().await?;
459/// let mut page = handle.browser().expect("valid browser").new_page().await?;
460/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
461/// let html = page.content().await?;
462/// drop(page); // closes the tab
463/// handle.release().await;
464/// # Ok(())
465/// # }
466/// ```
467pub struct PageHandle {
468 page: Page,
469 cdp_timeout: Duration,
470 /// HTTP status code of the most recent main-frame navigation, or `0` if not
471 /// yet captured. Written atomically by the listener spawned in `navigate()`.
472 last_status_code: Arc<AtomicU16>,
473 /// Background task processing `Fetch.requestPaused` events. Aborted and
474 /// replaced each time `set_resource_filter` is called.
475 resource_filter_task: Option<tokio::task::JoinHandle<()>>,
476}
477
478impl PageHandle {
479 /// Wrap a raw chromiumoxide [`Page`] in a handle.
480 pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
481 Self {
482 page,
483 cdp_timeout,
484 last_status_code: Arc::new(AtomicU16::new(0)),
485 resource_filter_task: None,
486 }
487 }
488
489 /// Navigate to `url` and wait for `condition` within `nav_timeout`.
490 ///
491 /// # Errors
492 ///
493 /// Returns [`BrowserError::NavigationFailed`] if the navigation times out or
494 /// the CDP call fails.
495 pub async fn navigate(
496 &mut self,
497 url: &str,
498 condition: WaitUntil,
499 nav_timeout: Duration,
500 ) -> Result<()> {
501 self.setup_status_capture().await;
502 timeout(
503 nav_timeout,
504 self.navigate_inner(url, condition, nav_timeout),
505 )
506 .await
507 .map_err(|_| BrowserError::NavigationFailed {
508 url: url.to_string(),
509 reason: format!("navigation timed out after {nav_timeout:?}"),
510 })?
511 }
512
513 /// Reset the last status code and wire up the `Network.responseReceived`
514 /// listener before any navigation starts. Errors are logged and swallowed
515 /// so that a missing network domain never blocks navigation.
516 async fn setup_status_capture(&self) {
517 use chromiumoxide::cdp::browser_protocol::network::{
518 EventResponseReceived, ResourceType as NetworkResourceType,
519 };
520 use futures::StreamExt;
521
522 // Reset so a stale code is not returned if the new navigation fails
523 // before the response headers arrive.
524 self.last_status_code.store(0, Ordering::Release);
525
526 // Subscribe *before* goto() — the listener runs in a detached task and
527 // stores the first Document-type response status atomically.
528 let page_for_listener = self.page.clone();
529 let status_capture = Arc::clone(&self.last_status_code);
530 match page_for_listener
531 .event_listener::<EventResponseReceived>()
532 .await
533 {
534 Ok(mut stream) => {
535 tokio::spawn(async move {
536 while let Some(event) = stream.next().await {
537 if event.r#type == NetworkResourceType::Document {
538 let code = u16::try_from(event.response.status).unwrap_or(0);
539 if code > 0 {
540 status_capture.store(code, Ordering::Release);
541 }
542 break;
543 }
544 }
545 });
546 }
547 Err(e) => warn!("status-code capture unavailable: {e}"),
548 }
549 }
550
551 /// Subscribe to the appropriate CDP events, fire `goto`, then await
552 /// `condition`. All subscriptions precede `goto` to eliminate the race
553 /// described in issue #7.
554 async fn navigate_inner(
555 &self,
556 url: &str,
557 condition: WaitUntil,
558 nav_timeout: Duration,
559 ) -> Result<()> {
560 use chromiumoxide::cdp::browser_protocol::page::{
561 EventDomContentEventFired, EventLoadEventFired,
562 };
563 use futures::StreamExt;
564
565 let url_owned = url.to_string();
566
567 let mut dom_events = match &condition {
568 WaitUntil::DomContentLoaded => Some(
569 self.page
570 .event_listener::<EventDomContentEventFired>()
571 .await
572 .map_err(|e| BrowserError::NavigationFailed {
573 url: url_owned.clone(),
574 reason: e.to_string(),
575 })?,
576 ),
577 _ => None,
578 };
579
580 let mut load_events = match &condition {
581 WaitUntil::NetworkIdle => Some(
582 self.page
583 .event_listener::<EventLoadEventFired>()
584 .await
585 .map_err(|e| BrowserError::NavigationFailed {
586 url: url_owned.clone(),
587 reason: e.to_string(),
588 })?,
589 ),
590 _ => None,
591 };
592
593 let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
594 Some(self.subscribe_inflight_counter().await)
595 } else {
596 None
597 };
598
599 self.page
600 .goto(url)
601 .await
602 .map_err(|e| BrowserError::NavigationFailed {
603 url: url_owned.clone(),
604 reason: e.to_string(),
605 })?;
606
607 match &condition {
608 WaitUntil::DomContentLoaded => {
609 if let Some(ref mut events) = dom_events {
610 let _ = events.next().await;
611 }
612 }
613 WaitUntil::NetworkIdle => {
614 if let Some(ref mut events) = load_events {
615 let _ = events.next().await;
616 }
617 if let Some(ref counter) = inflight {
618 Self::wait_network_idle(counter).await;
619 }
620 }
621 WaitUntil::Selector(css) => {
622 self.wait_for_selector(css, nav_timeout).await?;
623 }
624 }
625 Ok(())
626 }
627
628 /// Spawn three detached tasks that maintain a signed in-flight request
629 /// counter via `Network.requestWillBeSent` (+1) and
630 /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
631 /// Returns the shared counter so the caller can poll it.
632 async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
633 use std::sync::atomic::AtomicI32;
634
635 use chromiumoxide::cdp::browser_protocol::network::{
636 EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
637 };
638 use futures::StreamExt;
639
640 let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
641 let pairs: [(Arc<AtomicI32>, i32); 3] = [
642 (Arc::clone(&counter), 1),
643 (Arc::clone(&counter), -1),
644 (Arc::clone(&counter), -1),
645 ];
646 let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
647
648 macro_rules! spawn_tracker {
649 ($page:expr, $event:ty, $c:expr, $delta:expr) => {
650 match $page.event_listener::<$event>().await {
651 Ok(mut s) => {
652 let c = $c;
653 let d = $delta;
654 tokio::spawn(async move {
655 while s.next().await.is_some() {
656 c.fetch_add(d, Ordering::Relaxed);
657 }
658 });
659 }
660 Err(e) => warn!("network-idle tracker unavailable: {e}"),
661 }
662 };
663 }
664
665 let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
666 spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
667 spawn_tracker!(p2, EventLoadingFinished, c2, d2);
668 spawn_tracker!(p3, EventLoadingFailed, c3, d3);
669
670 counter
671 }
672
673 /// Poll `counter` until ≤ 2 in-flight requests persist for 500 ms
674 /// (equivalent to Playwright's `networkidle2`).
675 async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
676 const IDLE_THRESHOLD: i32 = 2;
677 const SETTLE: Duration = Duration::from_millis(500);
678 loop {
679 if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
680 tokio::time::sleep(SETTLE).await;
681 if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
682 break;
683 }
684 } else {
685 tokio::time::sleep(Duration::from_millis(50)).await;
686 }
687 }
688 }
689
690 /// Wait until `document.querySelector(selector)` is non-null (`timeout`).
691 ///
692 /// # Errors
693 ///
694 /// Returns [`BrowserError::NavigationFailed`] if the selector is not found
695 /// within the given timeout.
696 pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
697 let selector_owned = selector.to_string();
698 let poll = async {
699 loop {
700 if self.page.find_element(selector_owned.clone()).await.is_ok() {
701 return Ok(());
702 }
703 tokio::time::sleep(Duration::from_millis(100)).await;
704 }
705 };
706
707 timeout(wait_timeout, poll)
708 .await
709 .map_err(|_| BrowserError::NavigationFailed {
710 url: String::new(),
711 reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
712 })?
713 }
714
715 /// Set a resource filter to block specific network request types.
716 ///
717 /// Enables `Fetch` interception and spawns a background task that continues
718 /// allowed requests and fails blocked ones with `BlockedByClient`. Any
719 /// previously set filter task is cancelled first.
720 ///
721 /// # Errors
722 ///
723 /// Returns a [`BrowserError::CdpError`] if the CDP call fails.
724 pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
725 use chromiumoxide::cdp::browser_protocol::fetch::{
726 ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
727 RequestPattern,
728 };
729 use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
730 use futures::StreamExt as _;
731
732 if filter.is_empty() {
733 return Ok(());
734 }
735
736 // Cancel any previously running filter task.
737 if let Some(task) = self.resource_filter_task.take() {
738 task.abort();
739 }
740
741 let pattern = RequestPattern::builder().url_pattern("*").build();
742 let params = EnableParams::builder()
743 .patterns(vec![pattern])
744 .handle_auth_requests(false)
745 .build();
746
747 timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
748 .await
749 .map_err(|_| BrowserError::Timeout {
750 operation: "Fetch.enable".to_string(),
751 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
752 })?
753 .map_err(|e| BrowserError::CdpError {
754 operation: "Fetch.enable".to_string(),
755 message: e.to_string(),
756 })?;
757
758 // Subscribe to requestPaused events and dispatch each one so navigation
759 // is never blocked. Without this handler Chrome holds every intercepted
760 // request indefinitely and the page hangs.
761 let mut events = self
762 .page
763 .event_listener::<EventRequestPaused>()
764 .await
765 .map_err(|e| BrowserError::CdpError {
766 operation: "Fetch.requestPaused subscribe".to_string(),
767 message: e.to_string(),
768 })?;
769
770 let page = self.page.clone();
771 debug!("Resource filter active: {:?}", filter);
772 let task = tokio::spawn(async move {
773 while let Some(event) = events.next().await {
774 let request_id = event.request_id.clone();
775 if filter.should_block(event.resource_type.as_ref()) {
776 let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
777 let _ = page.execute(params).await;
778 } else {
779 let _ = page.execute(ContinueRequestParams::new(request_id)).await;
780 }
781 }
782 });
783
784 self.resource_filter_task = Some(task);
785 Ok(())
786 }
787
788 /// Return the current page URL (post-navigation, post-redirect).
789 ///
790 /// Delegates to the CDP `Target.getTargetInfo` binding already used
791 /// internally by [`save_cookies`](Self::save_cookies); no extra network
792 /// request is made. Returns an empty string if the URL is not yet set
793 /// (e.g. on a blank tab before the first navigation).
794 ///
795 /// # Errors
796 ///
797 /// Returns [`BrowserError::CdpError`] if the underlying CDP call fails, or
798 /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
799 ///
800 /// # Example
801 ///
802 /// ```no_run
803 /// use stygian_browser::{BrowserPool, BrowserConfig};
804 /// use stygian_browser::page::WaitUntil;
805 /// use std::time::Duration;
806 ///
807 /// # async fn run() -> stygian_browser::error::Result<()> {
808 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
809 /// let handle = pool.acquire().await?;
810 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
811 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
812 /// let url = page.url().await?;
813 /// println!("Final URL after redirects: {url}");
814 /// # Ok(())
815 /// # }
816 /// ```
817 pub async fn url(&self) -> Result<String> {
818 timeout(self.cdp_timeout, self.page.url())
819 .await
820 .map_err(|_| BrowserError::Timeout {
821 operation: "page.url".to_string(),
822 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
823 })?
824 .map_err(|e| BrowserError::CdpError {
825 operation: "page.url".to_string(),
826 message: e.to_string(),
827 })
828 .map(Option::unwrap_or_default)
829 }
830
831 /// Return the HTTP status code of the most recent main-frame navigation.
832 ///
833 /// The status is captured from the `Network.responseReceived` CDP event
834 /// wired up inside [`navigate`](Self::navigate), so it reflects the
835 /// *final* response after any server-side redirects.
836 ///
837 /// Returns `None` if the status was not captured — for example on `file://`
838 /// navigations, when [`navigate`](Self::navigate) has not yet been called,
839 /// or if the network event subscription failed.
840 ///
841 /// # Errors
842 ///
843 /// This method is infallible; the `Result` wrapper is kept for API
844 /// consistency with other `PageHandle` methods.
845 ///
846 /// # Example
847 ///
848 /// ```no_run
849 /// use stygian_browser::{BrowserPool, BrowserConfig};
850 /// use stygian_browser::page::WaitUntil;
851 /// use std::time::Duration;
852 ///
853 /// # async fn run() -> stygian_browser::error::Result<()> {
854 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
855 /// let handle = pool.acquire().await?;
856 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
857 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
858 /// if let Some(code) = page.status_code()? {
859 /// println!("HTTP {code}");
860 /// }
861 /// # Ok(())
862 /// # }
863 /// ```
864 pub fn status_code(&self) -> Result<Option<u16>> {
865 let code = self.last_status_code.load(Ordering::Acquire);
866 Ok(if code == 0 { None } else { Some(code) })
867 }
868
869 /// Return the page's `<title>` text.
870 ///
871 /// # Errors
872 ///
873 /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
874 pub async fn title(&self) -> Result<String> {
875 timeout(self.cdp_timeout, self.page.get_title())
876 .await
877 .map_err(|_| BrowserError::Timeout {
878 operation: "get_title".to_string(),
879 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
880 })?
881 .map_err(|e| BrowserError::ScriptExecutionFailed {
882 script: "document.title".to_string(),
883 reason: e.to_string(),
884 })
885 .map(Option::unwrap_or_default)
886 }
887
888 /// Return the page's full outer HTML.
889 ///
890 /// # Errors
891 ///
892 /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
893 pub async fn content(&self) -> Result<String> {
894 timeout(self.cdp_timeout, self.page.content())
895 .await
896 .map_err(|_| BrowserError::Timeout {
897 operation: "page.content".to_string(),
898 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
899 })?
900 .map_err(|e| BrowserError::ScriptExecutionFailed {
901 script: "document.documentElement.outerHTML".to_string(),
902 reason: e.to_string(),
903 })
904 }
905
906 /// Query the live DOM for all elements matching `selector` and return
907 /// lightweight [`NodeHandle`]s backed by CDP `RemoteObjectId`s.
908 ///
909 /// No HTML serialisation occurs — the browser's in-memory DOM is queried
910 /// directly over the CDP connection, eliminating the `page.content()` +
911 /// `scraper::Html::parse_document` round-trip.
912 ///
913 /// Returns an empty `Vec` when no elements match (consistent with the JS
914 /// `querySelectorAll` contract — not an error).
915 ///
916 /// # Errors
917 ///
918 /// Returns [`BrowserError::CdpError`] if the CDP find call fails, or
919 /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
920 ///
921 /// # Example
922 ///
923 /// ```no_run
924 /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
925 /// use std::time::Duration;
926 ///
927 /// # async fn run() -> stygian_browser::error::Result<()> {
928 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
929 /// let handle = pool.acquire().await?;
930 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
931 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
932 ///
933 /// let nodes = page.query_selector_all("[data-ux]").await?;
934 /// for node in &nodes {
935 /// let ux_type = node.attr("data-ux").await?;
936 /// let text = node.text_content().await?;
937 /// println!("{ux_type:?}: {text}");
938 /// }
939 /// # Ok(())
940 /// # }
941 /// ```
942 pub async fn query_selector_all(&self, selector: &str) -> Result<Vec<NodeHandle>> {
943 let elements = timeout(self.cdp_timeout, self.page.find_elements(selector))
944 .await
945 .map_err(|_| BrowserError::Timeout {
946 operation: "PageHandle::query_selector_all".to_string(),
947 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
948 })?
949 .map_err(|e| BrowserError::CdpError {
950 operation: "PageHandle::query_selector_all".to_string(),
951 message: e.to_string(),
952 })?;
953
954 let selector_arc: Arc<str> = Arc::from(selector);
955 Ok(elements
956 .into_iter()
957 .map(|el| NodeHandle {
958 element: el,
959 selector: selector_arc.clone(),
960 cdp_timeout: self.cdp_timeout,
961 })
962 .collect())
963 }
964
965 /// Evaluate arbitrary JavaScript and return the result as `T`.
966 ///
967 /// # Errors
968 ///
969 /// Returns [`BrowserError::ScriptExecutionFailed`] on eval failure or
970 /// deserialization error.
971 pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
972 let script_owned = script.to_string();
973 timeout(self.cdp_timeout, self.page.evaluate(script))
974 .await
975 .map_err(|_| BrowserError::Timeout {
976 operation: "page.evaluate".to_string(),
977 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
978 })?
979 .map_err(|e| BrowserError::ScriptExecutionFailed {
980 script: script_owned.clone(),
981 reason: e.to_string(),
982 })?
983 .into_value::<T>()
984 .map_err(|e| BrowserError::ScriptExecutionFailed {
985 script: script_owned,
986 reason: e.to_string(),
987 })
988 }
989
990 /// Save all cookies for the current page's origin.
991 ///
992 /// # Errors
993 ///
994 /// Returns [`BrowserError::CdpError`] if the CDP call fails.
995 pub async fn save_cookies(
996 &self,
997 ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
998 use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
999
1000 let url = self
1001 .page
1002 .url()
1003 .await
1004 .map_err(|e| BrowserError::CdpError {
1005 operation: "page.url".to_string(),
1006 message: e.to_string(),
1007 })?
1008 .unwrap_or_default();
1009
1010 timeout(
1011 self.cdp_timeout,
1012 self.page
1013 .execute(GetCookiesParams::builder().urls(vec![url]).build()),
1014 )
1015 .await
1016 .map_err(|_| BrowserError::Timeout {
1017 operation: "Network.getCookies".to_string(),
1018 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1019 })?
1020 .map_err(|e| BrowserError::CdpError {
1021 operation: "Network.getCookies".to_string(),
1022 message: e.to_string(),
1023 })
1024 .map(|r| r.cookies.clone())
1025 }
1026
1027 /// Inject cookies into the current page.
1028 ///
1029 /// Seeds session tokens or other state without needing a full
1030 /// [`SessionSnapshot`][crate::session::SessionSnapshot] and without
1031 /// requiring a direct `chromiumoxide` dependency in calling code.
1032 ///
1033 /// Individual cookie failures are logged as warnings and do not abort the
1034 /// remaining cookies.
1035 ///
1036 /// # Errors
1037 ///
1038 /// Returns [`BrowserError::Timeout`] if a single `Network.setCookie` CDP
1039 /// call exceeds `cdp_timeout`.
1040 ///
1041 /// # Example
1042 ///
1043 /// ```no_run
1044 /// use stygian_browser::{BrowserPool, BrowserConfig};
1045 /// use stygian_browser::session::SessionCookie;
1046 /// use std::time::Duration;
1047 ///
1048 /// # async fn run() -> stygian_browser::error::Result<()> {
1049 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1050 /// let handle = pool.acquire().await?;
1051 /// let page = handle.browser().expect("valid browser").new_page().await?;
1052 /// let cookies = vec![SessionCookie {
1053 /// name: "session".to_string(),
1054 /// value: "abc123".to_string(),
1055 /// domain: ".example.com".to_string(),
1056 /// path: "/".to_string(),
1057 /// expires: -1.0,
1058 /// http_only: true,
1059 /// secure: true,
1060 /// same_site: "Lax".to_string(),
1061 /// }];
1062 /// page.inject_cookies(&cookies).await?;
1063 /// # Ok(())
1064 /// # }
1065 /// ```
1066 pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
1067 use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
1068
1069 for cookie in cookies {
1070 let params = match SetCookieParams::builder()
1071 .name(cookie.name.clone())
1072 .value(cookie.value.clone())
1073 .domain(cookie.domain.clone())
1074 .path(cookie.path.clone())
1075 .http_only(cookie.http_only)
1076 .secure(cookie.secure)
1077 .build()
1078 {
1079 Ok(p) => p,
1080 Err(e) => {
1081 warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
1082 continue;
1083 }
1084 };
1085
1086 match timeout(self.cdp_timeout, self.page.execute(params)).await {
1087 Err(_) => {
1088 warn!(
1089 cookie = %cookie.name,
1090 timeout_ms = self.cdp_timeout.as_millis(),
1091 "Timed out injecting cookie"
1092 );
1093 }
1094 Ok(Err(e)) => {
1095 warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
1096 }
1097 Ok(Ok(_)) => {}
1098 }
1099 }
1100
1101 debug!(count = cookies.len(), "Cookies injected");
1102 Ok(())
1103 }
1104
1105 /// Capture a screenshot of the current page as PNG bytes.
1106 ///
1107 /// The screenshot is full-page by default (viewport clipped to the rendered
1108 /// layout area). Save the returned bytes to a `.png` file or process
1109 /// them in-memory.
1110 ///
1111 /// # Errors
1112 ///
1113 /// Returns [`BrowserError::CdpError`] if the CDP `Page.captureScreenshot`
1114 /// command fails, or [`BrowserError::Timeout`] if it exceeds
1115 /// `cdp_timeout`.
1116 ///
1117 /// # Example
1118 ///
1119 /// ```no_run
1120 /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1121 /// use std::{time::Duration, fs};
1122 ///
1123 /// # async fn run() -> stygian_browser::error::Result<()> {
1124 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1125 /// let handle = pool.acquire().await?;
1126 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1127 /// page.navigate("https://example.com", WaitUntil::Selector("body".to_string()), Duration::from_secs(30)).await?;
1128 /// let png = page.screenshot().await?;
1129 /// fs::write("screenshot.png", &png).unwrap();
1130 /// # Ok(())
1131 /// # }
1132 /// ```
1133 pub async fn screenshot(&self) -> Result<Vec<u8>> {
1134 use chromiumoxide::page::ScreenshotParams;
1135
1136 let params = ScreenshotParams::builder().full_page(true).build();
1137
1138 timeout(self.cdp_timeout, self.page.screenshot(params))
1139 .await
1140 .map_err(|_| BrowserError::Timeout {
1141 operation: "Page.captureScreenshot".to_string(),
1142 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1143 })?
1144 .map_err(|e| BrowserError::CdpError {
1145 operation: "Page.captureScreenshot".to_string(),
1146 message: e.to_string(),
1147 })
1148 }
1149
1150 /// Borrow the underlying chromiumoxide [`Page`].
1151 pub const fn inner(&self) -> &Page {
1152 &self.page
1153 }
1154
1155 /// Close this page (tab).
1156 ///
1157 /// Called automatically on drop; explicit call avoids suppressing the error.
1158 pub async fn close(self) -> Result<()> {
1159 timeout(Duration::from_secs(5), self.page.clone().close())
1160 .await
1161 .map_err(|_| BrowserError::Timeout {
1162 operation: "page.close".to_string(),
1163 duration_ms: 5000,
1164 })?
1165 .map_err(|e| BrowserError::CdpError {
1166 operation: "page.close".to_string(),
1167 message: e.to_string(),
1168 })
1169 }
1170}
1171
1172// ─── Stealth diagnostics ──────────────────────────────────────────────────────
1173
1174#[cfg(feature = "stealth")]
1175impl PageHandle {
1176 /// Run all built-in stealth detection checks against the current page.
1177 ///
1178 /// Iterates [`crate::diagnostic::all_checks`], evaluates each check's
1179 /// JavaScript via CDP `Runtime.evaluate`, and returns an aggregate
1180 /// [`crate::diagnostic::DiagnosticReport`].
1181 ///
1182 /// Failed scripts (due to JS exceptions or deserialization errors) are
1183 /// recorded as failing checks and do **not** abort the whole run.
1184 ///
1185 /// # Errors
1186 ///
1187 /// Returns an error only if the underlying CDP transport fails entirely.
1188 /// Individual check failures are captured in the report.
1189 ///
1190 /// # Example
1191 ///
1192 /// ```no_run
1193 /// # async fn run() -> stygian_browser::error::Result<()> {
1194 /// use stygian_browser::{BrowserPool, BrowserConfig};
1195 /// use stygian_browser::page::WaitUntil;
1196 /// use std::time::Duration;
1197 ///
1198 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1199 /// let handle = pool.acquire().await?;
1200 /// let browser = handle.browser().expect("valid browser");
1201 /// let mut page = browser.new_page().await?;
1202 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(10)).await?;
1203 ///
1204 /// let report = page.verify_stealth().await?;
1205 /// println!("Stealth: {}/{} checks passed", report.passed_count, report.checks.len());
1206 /// for failure in report.failures() {
1207 /// eprintln!(" FAIL {}: {}", failure.description, failure.details);
1208 /// }
1209 /// # Ok(())
1210 /// # }
1211 /// ```
1212 pub async fn verify_stealth(&self) -> Result<crate::diagnostic::DiagnosticReport> {
1213 use crate::diagnostic::{CheckResult, DiagnosticReport, all_checks};
1214
1215 let mut results: Vec<CheckResult> = Vec::new();
1216
1217 for check in all_checks() {
1218 let result = match self.eval::<String>(check.script).await {
1219 Ok(json) => check.parse_output(&json),
1220 Err(e) => {
1221 tracing::warn!(
1222 check = ?check.id,
1223 error = %e,
1224 "stealth check script failed during evaluation"
1225 );
1226 CheckResult {
1227 id: check.id,
1228 description: check.description.to_string(),
1229 passed: false,
1230 details: format!("script error: {e}"),
1231 }
1232 }
1233 };
1234 tracing::debug!(
1235 check = ?result.id,
1236 passed = result.passed,
1237 details = %result.details,
1238 "stealth check result"
1239 );
1240 results.push(result);
1241 }
1242
1243 Ok(DiagnosticReport::new(results))
1244 }
1245}
1246
1247impl Drop for PageHandle {
1248 fn drop(&mut self) {
1249 warn!("PageHandle dropped without explicit close(); spawning cleanup task");
1250 // chromiumoxide Page does not implement close on Drop, so we spawn
1251 // a fire-and-forget task. The page ref is already owned; we need to
1252 // swap it out. We clone the Page handle (it's Arc-backed internally).
1253 let page = self.page.clone();
1254 tokio::spawn(async move {
1255 let _ = page.close().await;
1256 });
1257 }
1258}
1259
1260// ─── Tests ────────────────────────────────────────────────────────────────────
1261
1262#[cfg(test)]
1263mod tests {
1264 use super::*;
1265
1266 #[test]
1267 fn resource_filter_block_media_blocks_image() {
1268 let filter = ResourceFilter::block_media();
1269 assert!(filter.should_block("Image"));
1270 assert!(filter.should_block("Font"));
1271 assert!(filter.should_block("Stylesheet"));
1272 assert!(filter.should_block("Media"));
1273 assert!(!filter.should_block("Script"));
1274 assert!(!filter.should_block("XHR"));
1275 }
1276
1277 #[test]
1278 fn resource_filter_case_insensitive() {
1279 let filter = ResourceFilter::block_images_and_fonts();
1280 assert!(filter.should_block("image")); // lowercase
1281 assert!(filter.should_block("IMAGE")); // uppercase
1282 assert!(!filter.should_block("Stylesheet"));
1283 }
1284
1285 #[test]
1286 fn resource_filter_builder_chain() {
1287 let filter = ResourceFilter::default()
1288 .block(ResourceType::Image)
1289 .block(ResourceType::Font);
1290 assert!(filter.should_block("Image"));
1291 assert!(filter.should_block("Font"));
1292 assert!(!filter.should_block("Stylesheet"));
1293 }
1294
1295 #[test]
1296 fn resource_filter_dedup_block() {
1297 let filter = ResourceFilter::default()
1298 .block(ResourceType::Image)
1299 .block(ResourceType::Image); // duplicate
1300 assert_eq!(filter.blocked.len(), 1);
1301 }
1302
1303 #[test]
1304 fn resource_filter_is_empty_when_default() {
1305 assert!(ResourceFilter::default().is_empty());
1306 assert!(!ResourceFilter::block_media().is_empty());
1307 }
1308
1309 #[test]
1310 fn wait_until_selector_stores_string() {
1311 let w = WaitUntil::Selector("#foo".to_string());
1312 assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
1313 }
1314
1315 #[test]
1316 fn resource_type_cdp_str() {
1317 assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
1318 assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
1319 assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
1320 assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
1321 }
1322
1323 /// `PageHandle` must be `Send + Sync` for use across thread boundaries.
1324 #[test]
1325 fn page_handle_is_send_sync() {
1326 fn assert_send<T: Send>() {}
1327 fn assert_sync<T: Sync>() {}
1328 assert_send::<PageHandle>();
1329 assert_sync::<PageHandle>();
1330 }
1331
1332 /// The status-code sentinel (0 = "not yet captured") and the conversion to
1333 /// `Option<u16>` are pure-logic invariants testable without a live browser.
1334 #[test]
1335 fn status_code_sentinel_zero_maps_to_none() {
1336 use std::sync::atomic::{AtomicU16, Ordering};
1337 let atom = AtomicU16::new(0);
1338 let code = atom.load(Ordering::Acquire);
1339 assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
1340 }
1341
1342 #[test]
1343 fn status_code_non_zero_maps_to_some() {
1344 use std::sync::atomic::{AtomicU16, Ordering};
1345 for &expected in &[200u16, 301, 404, 503] {
1346 let atom = AtomicU16::new(expected);
1347 let code = atom.load(Ordering::Acquire);
1348 assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
1349 }
1350 }
1351
1352 // ── NodeHandle pure-logic tests ───────────────────────────────────────────
1353
1354 /// `attr_map` relies on `chunks_exact(2)` — verify the pairing logic is
1355 /// correct without a live browser by exercising it directly.
1356 #[test]
1357 fn attr_map_chunking_pairs_correctly() {
1358 let flat = [
1359 "id".to_string(),
1360 "main".to_string(),
1361 "data-ux".to_string(),
1362 "Section".to_string(),
1363 "class".to_string(),
1364 "container".to_string(),
1365 ];
1366 let mut map = std::collections::HashMap::with_capacity(flat.len() / 2);
1367 for pair in flat.chunks_exact(2) {
1368 if let [name, value] = pair {
1369 map.insert(name.clone(), value.clone());
1370 }
1371 }
1372 assert_eq!(map.get("id").map(String::as_str), Some("main"));
1373 assert_eq!(map.get("data-ux").map(String::as_str), Some("Section"));
1374 assert_eq!(map.get("class").map(String::as_str), Some("container"));
1375 assert_eq!(map.len(), 3);
1376 }
1377
1378 /// Odd-length flat attribute lists (malformed CDP response) are handled
1379 /// gracefully — the trailing element is silently ignored.
1380 #[test]
1381 fn attr_map_chunking_ignores_odd_trailing() {
1382 let flat = ["orphan".to_string()]; // no value
1383 let mut map = std::collections::HashMap::new();
1384 for pair in flat.chunks_exact(2) {
1385 if let [name, value] = pair {
1386 map.insert(name.clone(), value.clone());
1387 }
1388 }
1389 assert!(map.is_empty());
1390 }
1391
1392 /// Empty flat list → empty map.
1393 #[test]
1394 fn attr_map_chunking_empty_input() {
1395 let flat: Vec<String> = vec![];
1396 let map: std::collections::HashMap<String, String> = flat
1397 .chunks_exact(2)
1398 .filter_map(|pair| {
1399 if let [name, value] = pair {
1400 Some((name.clone(), value.clone()))
1401 } else {
1402 None
1403 }
1404 })
1405 .collect();
1406 assert!(map.is_empty());
1407 }
1408
1409 /// `ancestors` JSON parsing: valid input round-trips correctly.
1410 #[test]
1411 fn ancestors_json_parse_round_trip() -> std::result::Result<(), serde_json::Error> {
1412 let json = r#"["p","article","body","html"]"#;
1413 let result: Vec<String> = serde_json::from_str(json)?;
1414 assert_eq!(result, ["p", "article", "body", "html"]);
1415 Ok(())
1416 }
1417
1418 /// `ancestors` JSON parsing: empty array (no parent) is fine.
1419 #[test]
1420 fn ancestors_json_parse_empty() -> std::result::Result<(), serde_json::Error> {
1421 let json = "[]";
1422 let result: Vec<String> = serde_json::from_str(json)?;
1423 assert!(result.is_empty());
1424 Ok(())
1425 }
1426}