stygian_browser/page.rs
1//! Page and browsing context management for isolated, parallel scraping
2//!
3//! Each `BrowserContext` (future) is an incognito-style isolation boundary (separate
4//! cookies, localStorage, cache). Each context can contain many [`PageHandle`]s
5//! (tabs). Both types clean up their CDP resources automatically on drop.
6//!
7//! ## Resource blocking
8//!
9//! Pass a [`ResourceFilter`] to [`PageHandle::set_resource_filter`] to intercept
10//! and block specific request types (images, fonts, CSS) before page load —
11//! significantly reducing page load times for text-only scraping.
12//!
13//! ## Wait strategies
14//!
15//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
16//! - `DomContentLoaded` — fires when the HTML is parsed
17//! - `NetworkIdle` — fires when there are ≤2 in-flight requests for 500 ms
18//! - `Selector(css)` — fires when a CSS selector matches an element
19//!
20//! # Example
21//!
22//! ```no_run
23//! use stygian_browser::{BrowserPool, BrowserConfig};
24//! use stygian_browser::page::{ResourceFilter, WaitUntil};
25//! use std::time::Duration;
26//!
27//! # async fn run() -> stygian_browser::error::Result<()> {
28//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
29//! let handle = pool.acquire().await?;
30//!
31//! let mut page = handle.browser().expect("valid browser").new_page().await?;
32//! page.set_resource_filter(ResourceFilter::block_media()).await?;
33//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
34//! let title = page.title().await?;
35//! println!("title: {title}");
36//! handle.release().await;
37//! # Ok(())
38//! # }
39//! ```
40
41use std::collections::HashMap;
42use std::sync::{
43 Arc,
44 atomic::{AtomicU16, Ordering},
45};
46use std::time::Duration;
47
48use chromiumoxide::Page;
49use tokio::time::timeout;
50use tracing::{debug, warn};
51
52use crate::error::{BrowserError, Result};
53
54// ─── ResourceType ─────────────────────────────────────────────────────────────
55
56/// CDP resource types that can be intercepted.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub enum ResourceType {
59 /// `<img>`, `<picture>`, background images
60 Image,
61 /// Web fonts loaded via CSS `@font-face`
62 Font,
63 /// External CSS stylesheets
64 Stylesheet,
65 /// Media files (audio/video)
66 Media,
67}
68
69impl ResourceType {
70 /// Returns the string used in CDP `Network.requestIntercepted` events.
71 pub const fn as_cdp_str(&self) -> &'static str {
72 match self {
73 Self::Image => "Image",
74 Self::Font => "Font",
75 Self::Stylesheet => "Stylesheet",
76 Self::Media => "Media",
77 }
78 }
79}
80
81// ─── ResourceFilter ───────────────────────────────────────────────────────────
82
83/// Set of resource types to block from loading.
84///
85/// # Example
86///
87/// ```
88/// use stygian_browser::page::ResourceFilter;
89/// let filter = ResourceFilter::block_media();
90/// assert!(filter.should_block("Image"));
91/// ```
92#[derive(Debug, Clone, Default)]
93pub struct ResourceFilter {
94 blocked: Vec<ResourceType>,
95}
96
97impl ResourceFilter {
98 /// Block all media resources (images, fonts, CSS, audio/video).
99 pub fn block_media() -> Self {
100 Self {
101 blocked: vec![
102 ResourceType::Image,
103 ResourceType::Font,
104 ResourceType::Stylesheet,
105 ResourceType::Media,
106 ],
107 }
108 }
109
110 /// Block only images and fonts (keep styles for layout-sensitive work).
111 pub fn block_images_and_fonts() -> Self {
112 Self {
113 blocked: vec![ResourceType::Image, ResourceType::Font],
114 }
115 }
116
117 /// Add a resource type to the block list.
118 #[must_use]
119 pub fn block(mut self, resource: ResourceType) -> Self {
120 if !self.blocked.contains(&resource) {
121 self.blocked.push(resource);
122 }
123 self
124 }
125
126 /// Returns `true` if the given CDP resource type string should be blocked.
127 pub fn should_block(&self, cdp_type: &str) -> bool {
128 self.blocked
129 .iter()
130 .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
131 }
132
133 /// Returns `true` if no resource types are blocked.
134 pub const fn is_empty(&self) -> bool {
135 self.blocked.is_empty()
136 }
137}
138
139// ─── WaitUntil ────────────────────────────────────────────────────────────────
140
141/// Condition to wait for after a navigation.
142///
143/// # Example
144///
145/// ```
146/// use stygian_browser::page::WaitUntil;
147/// let w = WaitUntil::Selector("#main".to_string());
148/// assert!(matches!(w, WaitUntil::Selector(_)));
149/// ```
150#[derive(Debug, Clone)]
151pub enum WaitUntil {
152 /// Wait for the `Page.domContentEventFired` CDP event — fires when the HTML
153 /// document has been fully parsed and the DOM is ready, before subresources
154 /// such as images and stylesheets finish loading.
155 DomContentLoaded,
156 /// Wait for the `Page.loadEventFired` CDP event **and** then wait until no
157 /// more than 2 network requests are in-flight for at least 500 ms
158 /// (equivalent to Playwright's `networkidle2`).
159 NetworkIdle,
160 /// Wait until `document.querySelector(selector)` returns a non-null element.
161 Selector(String),
162}
163
164// ─── NodeHandle ───────────────────────────────────────────────────────────────
165
166/// A handle to a live DOM node backed by a CDP `RemoteObjectId`.
167///
168/// Obtained via [`PageHandle::query_selector_all`]. Each method issues one or
169/// more CDP `Runtime.callFunctionOn` calls against the held V8 remote object
170/// reference — no HTML serialisation occurs.
171///
172/// A handle becomes **stale** after page navigation or if the underlying DOM
173/// node is removed. Stale calls return [`BrowserError::StaleNode`] so callers
174/// can distinguish them from other CDP failures.
175///
176/// # Example
177///
178/// ```no_run
179/// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
180/// use std::time::Duration;
181///
182/// # async fn run() -> stygian_browser::error::Result<()> {
183/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
184/// let handle = pool.acquire().await?;
185/// let mut page = handle.browser().expect("valid browser").new_page().await?;
186/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
187///
188/// for node in page.query_selector_all("a[href]").await? {
189/// let href = node.attr("href").await?;
190/// let text = node.text_content().await?;
191/// println!("{text}: {href:?}");
192/// }
193/// # Ok(())
194/// # }
195/// ```
196pub struct NodeHandle {
197 element: chromiumoxide::element::Element,
198 /// Original CSS selector — preserved for stale-node error messages only.
199 selector: String,
200 cdp_timeout: Duration,
201}
202
203impl NodeHandle {
204 /// Return a single attribute value, or `None` if the attribute is absent.
205 ///
206 /// Issues one `Runtime.callFunctionOn` CDP call (`el.getAttribute(name)`).
207 ///
208 /// # Errors
209 ///
210 /// Returns [`BrowserError::StaleNode`] when the remote object has been
211 /// invalidated, or [`BrowserError::Timeout`] / [`BrowserError::CdpError`]
212 /// on transport-level failures.
213 pub async fn attr(&self, name: &str) -> Result<Option<String>> {
214 timeout(self.cdp_timeout, self.element.attribute(name))
215 .await
216 .map_err(|_| BrowserError::Timeout {
217 operation: "NodeHandle::attr".to_string(),
218 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
219 })?
220 .map_err(|e| self.cdp_err_or_stale(&e, "attr"))
221 }
222
223 /// Return all attributes as a `HashMap<name, value>` in a **single**
224 /// CDP round-trip.
225 ///
226 /// Uses `DOM.getAttributes` (via the chromiumoxide `attributes()` API)
227 /// which returns a flat `[name, value, name, value, …]` list from the node
228 /// description — no per-attribute calls are needed.
229 ///
230 /// # Errors
231 ///
232 /// Returns [`BrowserError::StaleNode`] when the remote object has been
233 /// invalidated.
234 pub async fn attr_map(&self) -> Result<HashMap<String, String>> {
235 let flat = timeout(self.cdp_timeout, self.element.attributes())
236 .await
237 .map_err(|_| BrowserError::Timeout {
238 operation: "NodeHandle::attr_map".to_string(),
239 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
240 })?
241 .map_err(|e| self.cdp_err_or_stale(&e, "attr_map"))?;
242
243 let mut map = HashMap::with_capacity(flat.len() / 2);
244 for pair in flat.chunks_exact(2) {
245 if let [name, value] = pair {
246 map.insert(name.clone(), value.clone());
247 }
248 }
249 Ok(map)
250 }
251
252 /// Return the element's `textContent` (all text inside, no markup).
253 ///
254 /// Returns an empty string when the property is absent or null.
255 ///
256 /// # Errors
257 ///
258 /// Returns [`BrowserError::StaleNode`] when the remote object has been
259 /// invalidated.
260 pub async fn text_content(&self) -> Result<String> {
261 timeout(self.cdp_timeout, self.element.inner_text())
262 .await
263 .map_err(|_| BrowserError::Timeout {
264 operation: "NodeHandle::text_content".to_string(),
265 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
266 })?
267 .map_err(|e| self.cdp_err_or_stale(&e, "text_content"))
268 .map(Option::unwrap_or_default)
269 }
270
271 /// Return the element's `innerHTML`.
272 ///
273 /// Returns an empty string when the property is absent or null.
274 ///
275 /// # Errors
276 ///
277 /// Returns [`BrowserError::StaleNode`] when the remote object has been
278 /// invalidated.
279 pub async fn inner_html(&self) -> Result<String> {
280 timeout(self.cdp_timeout, self.element.inner_html())
281 .await
282 .map_err(|_| BrowserError::Timeout {
283 operation: "NodeHandle::inner_html".to_string(),
284 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
285 })?
286 .map_err(|e| self.cdp_err_or_stale(&e, "inner_html"))
287 .map(Option::unwrap_or_default)
288 }
289
290 /// Return the element's `outerHTML`.
291 ///
292 /// Returns an empty string when the property is absent or null.
293 ///
294 /// # Errors
295 ///
296 /// Returns [`BrowserError::StaleNode`] when the remote object has been
297 /// invalidated.
298 pub async fn outer_html(&self) -> Result<String> {
299 timeout(self.cdp_timeout, self.element.outer_html())
300 .await
301 .map_err(|_| BrowserError::Timeout {
302 operation: "NodeHandle::outer_html".to_string(),
303 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
304 })?
305 .map_err(|e| self.cdp_err_or_stale(&e, "outer_html"))
306 .map(Option::unwrap_or_default)
307 }
308
309 /// Return the ancestor tag-name chain, root-last.
310 ///
311 /// Executes a single `Runtime.callFunctionOn` JavaScript function that
312 /// walks `parentElement` and collects tag names — no repeated CDP calls.
313 ///
314 /// ```text
315 /// // for <span> inside <p> inside <article> inside <body> inside <html>
316 /// ["p", "article", "body", "html"]
317 /// ```
318 ///
319 /// # Errors
320 ///
321 /// Returns [`BrowserError::StaleNode`] when the remote object has been
322 /// invalidated, or [`BrowserError::ScriptExecutionFailed`] when the
323 /// JSON returned by the script cannot be parsed.
324 pub async fn ancestors(&self) -> Result<Vec<String>> {
325 let returns = timeout(
326 self.cdp_timeout,
327 self.element.call_js_fn(
328 r"function() {
329 const a = [];
330 let n = this.parentElement;
331 while (n) { a.push(n.tagName.toLowerCase()); n = n.parentElement; }
332 return JSON.stringify(a);
333 }",
334 false,
335 ),
336 )
337 .await
338 .map_err(|_| BrowserError::Timeout {
339 operation: "NodeHandle::ancestors".to_string(),
340 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
341 })?
342 .map_err(|e| self.cdp_err_or_stale(&e, "ancestors"))?;
343
344 let json_str = returns
345 .result
346 .value
347 .as_ref()
348 .and_then(|v| v.as_str())
349 .unwrap_or("[]");
350
351 serde_json::from_str::<Vec<String>>(json_str).map_err(|e| {
352 BrowserError::ScriptExecutionFailed {
353 script: "NodeHandle::ancestors".to_string(),
354 reason: e.to_string(),
355 }
356 })
357 }
358
359 /// Return child elements matching `selector` as new [`NodeHandle`]s.
360 ///
361 /// Issues a single `Runtime.callFunctionOn` + `DOM.querySelectorAll`
362 /// call scoped to this element — not to the entire document.
363 ///
364 /// Returns an empty `Vec` when no children match (consistent with the JS
365 /// `querySelectorAll` contract).
366 ///
367 /// # Errors
368 ///
369 /// Returns [`BrowserError::StaleNode`] when the remote object has been
370 /// invalidated, or [`BrowserError::CdpError`] on transport failure.
371 pub async fn children_matching(&self, selector: &str) -> Result<Vec<Self>> {
372 let elements = timeout(self.cdp_timeout, self.element.find_elements(selector))
373 .await
374 .map_err(|_| BrowserError::Timeout {
375 operation: "NodeHandle::children_matching".to_string(),
376 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
377 })?
378 .map_err(|e| self.cdp_err_or_stale(&e, "children_matching"))?;
379
380 Ok(elements
381 .into_iter()
382 .map(|el| Self {
383 element: el,
384 selector: selector.to_string(),
385 cdp_timeout: self.cdp_timeout,
386 })
387 .collect())
388 }
389
390 /// Map a chromiumoxide `CdpError` to either [`BrowserError::StaleNode`]
391 /// (when the remote object reference has been invalidated) or
392 /// [`BrowserError::CdpError`] for all other failures.
393 fn cdp_err_or_stale(
394 &self,
395 err: &chromiumoxide::error::CdpError,
396 operation: &str,
397 ) -> BrowserError {
398 let msg = err.to_string();
399 if msg.contains("Cannot find object with id")
400 || msg.contains("context with specified id")
401 || msg.contains("Cannot find context")
402 {
403 BrowserError::StaleNode {
404 selector: self.selector.clone(),
405 }
406 } else {
407 BrowserError::CdpError {
408 operation: operation.to_string(),
409 message: msg,
410 }
411 }
412 }
413}
414
415// ─── PageHandle ───────────────────────────────────────────────────────────────
416
417/// A handle to an open browser tab.
418///
419/// On drop the underlying page is closed automatically.
420///
421/// # Example
422///
423/// ```no_run
424/// use stygian_browser::{BrowserPool, BrowserConfig};
425/// use stygian_browser::page::WaitUntil;
426/// use std::time::Duration;
427///
428/// # async fn run() -> stygian_browser::error::Result<()> {
429/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
430/// let handle = pool.acquire().await?;
431/// let mut page = handle.browser().expect("valid browser").new_page().await?;
432/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
433/// let html = page.content().await?;
434/// drop(page); // closes the tab
435/// handle.release().await;
436/// # Ok(())
437/// # }
438/// ```
439pub struct PageHandle {
440 page: Page,
441 cdp_timeout: Duration,
442 /// HTTP status code of the most recent main-frame navigation, or `0` if not
443 /// yet captured. Written atomically by the listener spawned in `navigate()`.
444 last_status_code: Arc<AtomicU16>,
445 /// Background task processing `Fetch.requestPaused` events. Aborted and
446 /// replaced each time `set_resource_filter` is called.
447 resource_filter_task: Option<tokio::task::JoinHandle<()>>,
448}
449
450impl PageHandle {
451 /// Wrap a raw chromiumoxide [`Page`] in a handle.
452 pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
453 Self {
454 page,
455 cdp_timeout,
456 last_status_code: Arc::new(AtomicU16::new(0)),
457 resource_filter_task: None,
458 }
459 }
460
461 /// Navigate to `url` and wait for `condition` within `nav_timeout`.
462 ///
463 /// # Errors
464 ///
465 /// Returns [`BrowserError::NavigationFailed`] if the navigation times out or
466 /// the CDP call fails.
467 pub async fn navigate(
468 &mut self,
469 url: &str,
470 condition: WaitUntil,
471 nav_timeout: Duration,
472 ) -> Result<()> {
473 self.setup_status_capture().await;
474 timeout(
475 nav_timeout,
476 self.navigate_inner(url, condition, nav_timeout),
477 )
478 .await
479 .map_err(|_| BrowserError::NavigationFailed {
480 url: url.to_string(),
481 reason: format!("navigation timed out after {nav_timeout:?}"),
482 })?
483 }
484
485 /// Reset the last status code and wire up the `Network.responseReceived`
486 /// listener before any navigation starts. Errors are logged and swallowed
487 /// so that a missing network domain never blocks navigation.
488 async fn setup_status_capture(&self) {
489 use chromiumoxide::cdp::browser_protocol::network::{
490 EventResponseReceived, ResourceType as NetworkResourceType,
491 };
492 use futures::StreamExt;
493
494 // Reset so a stale code is not returned if the new navigation fails
495 // before the response headers arrive.
496 self.last_status_code.store(0, Ordering::Release);
497
498 // Subscribe *before* goto() — the listener runs in a detached task and
499 // stores the first Document-type response status atomically.
500 let page_for_listener = self.page.clone();
501 let status_capture = Arc::clone(&self.last_status_code);
502 match page_for_listener
503 .event_listener::<EventResponseReceived>()
504 .await
505 {
506 Ok(mut stream) => {
507 tokio::spawn(async move {
508 while let Some(event) = stream.next().await {
509 if event.r#type == NetworkResourceType::Document {
510 let code = u16::try_from(event.response.status).unwrap_or(0);
511 if code > 0 {
512 status_capture.store(code, Ordering::Release);
513 }
514 break;
515 }
516 }
517 });
518 }
519 Err(e) => warn!("status-code capture unavailable: {e}"),
520 }
521 }
522
523 /// Subscribe to the appropriate CDP events, fire `goto`, then await
524 /// `condition`. All subscriptions precede `goto` to eliminate the race
525 /// described in issue #7.
526 async fn navigate_inner(
527 &self,
528 url: &str,
529 condition: WaitUntil,
530 nav_timeout: Duration,
531 ) -> Result<()> {
532 use chromiumoxide::cdp::browser_protocol::page::{
533 EventDomContentEventFired, EventLoadEventFired,
534 };
535 use futures::StreamExt;
536
537 let url_owned = url.to_string();
538
539 let mut dom_events = match &condition {
540 WaitUntil::DomContentLoaded => Some(
541 self.page
542 .event_listener::<EventDomContentEventFired>()
543 .await
544 .map_err(|e| BrowserError::NavigationFailed {
545 url: url_owned.clone(),
546 reason: e.to_string(),
547 })?,
548 ),
549 _ => None,
550 };
551
552 let mut load_events = match &condition {
553 WaitUntil::NetworkIdle => Some(
554 self.page
555 .event_listener::<EventLoadEventFired>()
556 .await
557 .map_err(|e| BrowserError::NavigationFailed {
558 url: url_owned.clone(),
559 reason: e.to_string(),
560 })?,
561 ),
562 _ => None,
563 };
564
565 let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
566 Some(self.subscribe_inflight_counter().await)
567 } else {
568 None
569 };
570
571 self.page
572 .goto(url)
573 .await
574 .map_err(|e| BrowserError::NavigationFailed {
575 url: url_owned.clone(),
576 reason: e.to_string(),
577 })?;
578
579 match &condition {
580 WaitUntil::DomContentLoaded => {
581 if let Some(ref mut events) = dom_events {
582 let _ = events.next().await;
583 }
584 }
585 WaitUntil::NetworkIdle => {
586 if let Some(ref mut events) = load_events {
587 let _ = events.next().await;
588 }
589 if let Some(ref counter) = inflight {
590 Self::wait_network_idle(counter).await;
591 }
592 }
593 WaitUntil::Selector(css) => {
594 self.wait_for_selector(css, nav_timeout).await?;
595 }
596 }
597 Ok(())
598 }
599
600 /// Spawn three detached tasks that maintain a signed in-flight request
601 /// counter via `Network.requestWillBeSent` (+1) and
602 /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
603 /// Returns the shared counter so the caller can poll it.
604 async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
605 use std::sync::atomic::AtomicI32;
606
607 use chromiumoxide::cdp::browser_protocol::network::{
608 EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
609 };
610 use futures::StreamExt;
611
612 let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
613 let pairs: [(Arc<AtomicI32>, i32); 3] = [
614 (Arc::clone(&counter), 1),
615 (Arc::clone(&counter), -1),
616 (Arc::clone(&counter), -1),
617 ];
618 let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
619
620 macro_rules! spawn_tracker {
621 ($page:expr, $event:ty, $c:expr, $delta:expr) => {
622 match $page.event_listener::<$event>().await {
623 Ok(mut s) => {
624 let c = $c;
625 let d = $delta;
626 tokio::spawn(async move {
627 while s.next().await.is_some() {
628 c.fetch_add(d, Ordering::Relaxed);
629 }
630 });
631 }
632 Err(e) => warn!("network-idle tracker unavailable: {e}"),
633 }
634 };
635 }
636
637 let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
638 spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
639 spawn_tracker!(p2, EventLoadingFinished, c2, d2);
640 spawn_tracker!(p3, EventLoadingFailed, c3, d3);
641
642 counter
643 }
644
645 /// Poll `counter` until ≤ 2 in-flight requests persist for 500 ms
646 /// (equivalent to Playwright's `networkidle2`).
647 async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
648 const IDLE_THRESHOLD: i32 = 2;
649 const SETTLE: Duration = Duration::from_millis(500);
650 loop {
651 if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
652 tokio::time::sleep(SETTLE).await;
653 if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
654 break;
655 }
656 } else {
657 tokio::time::sleep(Duration::from_millis(50)).await;
658 }
659 }
660 }
661
662 /// Wait until `document.querySelector(selector)` is non-null (`timeout`).
663 ///
664 /// # Errors
665 ///
666 /// Returns [`BrowserError::NavigationFailed`] if the selector is not found
667 /// within the given timeout.
668 pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
669 let selector_owned = selector.to_string();
670 let poll = async {
671 loop {
672 if self.page.find_element(selector_owned.clone()).await.is_ok() {
673 return Ok(());
674 }
675 tokio::time::sleep(Duration::from_millis(100)).await;
676 }
677 };
678
679 timeout(wait_timeout, poll)
680 .await
681 .map_err(|_| BrowserError::NavigationFailed {
682 url: String::new(),
683 reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
684 })?
685 }
686
687 /// Set a resource filter to block specific network request types.
688 ///
689 /// Enables `Fetch` interception and spawns a background task that continues
690 /// allowed requests and fails blocked ones with `BlockedByClient`. Any
691 /// previously set filter task is cancelled first.
692 ///
693 /// # Errors
694 ///
695 /// Returns a [`BrowserError::CdpError`] if the CDP call fails.
696 pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
697 use chromiumoxide::cdp::browser_protocol::fetch::{
698 ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
699 RequestPattern,
700 };
701 use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
702 use futures::StreamExt as _;
703
704 if filter.is_empty() {
705 return Ok(());
706 }
707
708 // Cancel any previously running filter task.
709 if let Some(task) = self.resource_filter_task.take() {
710 task.abort();
711 }
712
713 let pattern = RequestPattern::builder().url_pattern("*").build();
714 let params = EnableParams::builder()
715 .patterns(vec![pattern])
716 .handle_auth_requests(false)
717 .build();
718
719 timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
720 .await
721 .map_err(|_| BrowserError::Timeout {
722 operation: "Fetch.enable".to_string(),
723 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
724 })?
725 .map_err(|e| BrowserError::CdpError {
726 operation: "Fetch.enable".to_string(),
727 message: e.to_string(),
728 })?;
729
730 // Subscribe to requestPaused events and dispatch each one so navigation
731 // is never blocked. Without this handler Chrome holds every intercepted
732 // request indefinitely and the page hangs.
733 let mut events = self
734 .page
735 .event_listener::<EventRequestPaused>()
736 .await
737 .map_err(|e| BrowserError::CdpError {
738 operation: "Fetch.requestPaused subscribe".to_string(),
739 message: e.to_string(),
740 })?;
741
742 let page = self.page.clone();
743 debug!("Resource filter active: {:?}", filter);
744 let task = tokio::spawn(async move {
745 while let Some(event) = events.next().await {
746 let request_id = event.request_id.clone();
747 if filter.should_block(event.resource_type.as_ref()) {
748 let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
749 let _ = page.execute(params).await;
750 } else {
751 let _ = page.execute(ContinueRequestParams::new(request_id)).await;
752 }
753 }
754 });
755
756 self.resource_filter_task = Some(task);
757 Ok(())
758 }
759
760 /// Return the current page URL (post-navigation, post-redirect).
761 ///
762 /// Delegates to the CDP `Target.getTargetInfo` binding already used
763 /// internally by [`save_cookies`](Self::save_cookies); no extra network
764 /// request is made. Returns an empty string if the URL is not yet set
765 /// (e.g. on a blank tab before the first navigation).
766 ///
767 /// # Errors
768 ///
769 /// Returns [`BrowserError::CdpError`] if the underlying CDP call fails, or
770 /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
771 ///
772 /// # Example
773 ///
774 /// ```no_run
775 /// use stygian_browser::{BrowserPool, BrowserConfig};
776 /// use stygian_browser::page::WaitUntil;
777 /// use std::time::Duration;
778 ///
779 /// # async fn run() -> stygian_browser::error::Result<()> {
780 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
781 /// let handle = pool.acquire().await?;
782 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
783 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
784 /// let url = page.url().await?;
785 /// println!("Final URL after redirects: {url}");
786 /// # Ok(())
787 /// # }
788 /// ```
789 pub async fn url(&self) -> Result<String> {
790 timeout(self.cdp_timeout, self.page.url())
791 .await
792 .map_err(|_| BrowserError::Timeout {
793 operation: "page.url".to_string(),
794 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
795 })?
796 .map_err(|e| BrowserError::CdpError {
797 operation: "page.url".to_string(),
798 message: e.to_string(),
799 })
800 .map(Option::unwrap_or_default)
801 }
802
803 /// Return the HTTP status code of the most recent main-frame navigation.
804 ///
805 /// The status is captured from the `Network.responseReceived` CDP event
806 /// wired up inside [`navigate`](Self::navigate), so it reflects the
807 /// *final* response after any server-side redirects.
808 ///
809 /// Returns `None` if the status was not captured — for example on `file://`
810 /// navigations, when [`navigate`](Self::navigate) has not yet been called,
811 /// or if the network event subscription failed.
812 ///
813 /// # Errors
814 ///
815 /// This method is infallible; the `Result` wrapper is kept for API
816 /// consistency with other `PageHandle` methods.
817 ///
818 /// # Example
819 ///
820 /// ```no_run
821 /// use stygian_browser::{BrowserPool, BrowserConfig};
822 /// use stygian_browser::page::WaitUntil;
823 /// use std::time::Duration;
824 ///
825 /// # async fn run() -> stygian_browser::error::Result<()> {
826 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
827 /// let handle = pool.acquire().await?;
828 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
829 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
830 /// if let Some(code) = page.status_code()? {
831 /// println!("HTTP {code}");
832 /// }
833 /// # Ok(())
834 /// # }
835 /// ```
836 pub fn status_code(&self) -> Result<Option<u16>> {
837 let code = self.last_status_code.load(Ordering::Acquire);
838 Ok(if code == 0 { None } else { Some(code) })
839 }
840
841 /// Return the page's `<title>` text.
842 ///
843 /// # Errors
844 ///
845 /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
846 pub async fn title(&self) -> Result<String> {
847 timeout(self.cdp_timeout, self.page.get_title())
848 .await
849 .map_err(|_| BrowserError::Timeout {
850 operation: "get_title".to_string(),
851 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
852 })?
853 .map_err(|e| BrowserError::ScriptExecutionFailed {
854 script: "document.title".to_string(),
855 reason: e.to_string(),
856 })
857 .map(Option::unwrap_or_default)
858 }
859
860 /// Return the page's full outer HTML.
861 ///
862 /// # Errors
863 ///
864 /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
865 pub async fn content(&self) -> Result<String> {
866 timeout(self.cdp_timeout, self.page.content())
867 .await
868 .map_err(|_| BrowserError::Timeout {
869 operation: "page.content".to_string(),
870 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
871 })?
872 .map_err(|e| BrowserError::ScriptExecutionFailed {
873 script: "document.documentElement.outerHTML".to_string(),
874 reason: e.to_string(),
875 })
876 }
877
878 /// Query the live DOM for all elements matching `selector` and return
879 /// lightweight [`NodeHandle`]s backed by CDP `RemoteObjectId`s.
880 ///
881 /// No HTML serialisation occurs — the browser's in-memory DOM is queried
882 /// directly over the CDP connection, eliminating the `page.content()` +
883 /// `scraper::Html::parse_document` round-trip.
884 ///
885 /// Returns an empty `Vec` when no elements match (consistent with the JS
886 /// `querySelectorAll` contract — not an error).
887 ///
888 /// # Errors
889 ///
890 /// Returns [`BrowserError::CdpError`] if the CDP find call fails, or
891 /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
892 ///
893 /// # Example
894 ///
895 /// ```no_run
896 /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
897 /// use std::time::Duration;
898 ///
899 /// # async fn run() -> stygian_browser::error::Result<()> {
900 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
901 /// let handle = pool.acquire().await?;
902 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
903 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
904 ///
905 /// let nodes = page.query_selector_all("[data-ux]").await?;
906 /// for node in &nodes {
907 /// let ux_type = node.attr("data-ux").await?;
908 /// let text = node.text_content().await?;
909 /// println!("{ux_type:?}: {text}");
910 /// }
911 /// # Ok(())
912 /// # }
913 /// ```
914 pub async fn query_selector_all(&self, selector: &str) -> Result<Vec<NodeHandle>> {
915 let elements = timeout(self.cdp_timeout, self.page.find_elements(selector))
916 .await
917 .map_err(|_| BrowserError::Timeout {
918 operation: "PageHandle::query_selector_all".to_string(),
919 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
920 })?
921 .map_err(|e| BrowserError::CdpError {
922 operation: "PageHandle::query_selector_all".to_string(),
923 message: e.to_string(),
924 })?;
925
926 Ok(elements
927 .into_iter()
928 .map(|el| NodeHandle {
929 element: el,
930 selector: selector.to_string(),
931 cdp_timeout: self.cdp_timeout,
932 })
933 .collect())
934 }
935
936 /// Evaluate arbitrary JavaScript and return the result as `T`.
937 ///
938 /// # Errors
939 ///
940 /// Returns [`BrowserError::ScriptExecutionFailed`] on eval failure or
941 /// deserialization error.
942 pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
943 let script_owned = script.to_string();
944 timeout(self.cdp_timeout, self.page.evaluate(script))
945 .await
946 .map_err(|_| BrowserError::Timeout {
947 operation: "page.evaluate".to_string(),
948 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
949 })?
950 .map_err(|e| BrowserError::ScriptExecutionFailed {
951 script: script_owned.clone(),
952 reason: e.to_string(),
953 })?
954 .into_value::<T>()
955 .map_err(|e| BrowserError::ScriptExecutionFailed {
956 script: script_owned,
957 reason: e.to_string(),
958 })
959 }
960
961 /// Save all cookies for the current page's origin.
962 ///
963 /// # Errors
964 ///
965 /// Returns [`BrowserError::CdpError`] if the CDP call fails.
966 pub async fn save_cookies(
967 &self,
968 ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
969 use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
970
971 let url = self
972 .page
973 .url()
974 .await
975 .map_err(|e| BrowserError::CdpError {
976 operation: "page.url".to_string(),
977 message: e.to_string(),
978 })?
979 .unwrap_or_default();
980
981 timeout(
982 self.cdp_timeout,
983 self.page
984 .execute(GetCookiesParams::builder().urls(vec![url]).build()),
985 )
986 .await
987 .map_err(|_| BrowserError::Timeout {
988 operation: "Network.getCookies".to_string(),
989 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
990 })?
991 .map_err(|e| BrowserError::CdpError {
992 operation: "Network.getCookies".to_string(),
993 message: e.to_string(),
994 })
995 .map(|r| r.cookies.clone())
996 }
997
998 /// Inject cookies into the current page.
999 ///
1000 /// Seeds session tokens or other state without needing a full
1001 /// [`SessionSnapshot`][crate::session::SessionSnapshot] and without
1002 /// requiring a direct `chromiumoxide` dependency in calling code.
1003 ///
1004 /// Individual cookie failures are logged as warnings and do not abort the
1005 /// remaining cookies.
1006 ///
1007 /// # Errors
1008 ///
1009 /// Returns [`BrowserError::Timeout`] if a single `Network.setCookie` CDP
1010 /// call exceeds `cdp_timeout`.
1011 ///
1012 /// # Example
1013 ///
1014 /// ```no_run
1015 /// use stygian_browser::{BrowserPool, BrowserConfig};
1016 /// use stygian_browser::session::SessionCookie;
1017 /// use std::time::Duration;
1018 ///
1019 /// # async fn run() -> stygian_browser::error::Result<()> {
1020 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1021 /// let handle = pool.acquire().await?;
1022 /// let page = handle.browser().expect("valid browser").new_page().await?;
1023 /// let cookies = vec![SessionCookie {
1024 /// name: "session".to_string(),
1025 /// value: "abc123".to_string(),
1026 /// domain: ".example.com".to_string(),
1027 /// path: "/".to_string(),
1028 /// expires: -1.0,
1029 /// http_only: true,
1030 /// secure: true,
1031 /// same_site: "Lax".to_string(),
1032 /// }];
1033 /// page.inject_cookies(&cookies).await?;
1034 /// # Ok(())
1035 /// # }
1036 /// ```
1037 pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
1038 use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
1039
1040 for cookie in cookies {
1041 let params = match SetCookieParams::builder()
1042 .name(cookie.name.clone())
1043 .value(cookie.value.clone())
1044 .domain(cookie.domain.clone())
1045 .path(cookie.path.clone())
1046 .http_only(cookie.http_only)
1047 .secure(cookie.secure)
1048 .build()
1049 {
1050 Ok(p) => p,
1051 Err(e) => {
1052 warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
1053 continue;
1054 }
1055 };
1056
1057 match timeout(self.cdp_timeout, self.page.execute(params)).await {
1058 Err(_) => {
1059 warn!(
1060 cookie = %cookie.name,
1061 timeout_ms = self.cdp_timeout.as_millis(),
1062 "Timed out injecting cookie"
1063 );
1064 }
1065 Ok(Err(e)) => {
1066 warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
1067 }
1068 Ok(Ok(_)) => {}
1069 }
1070 }
1071
1072 debug!(count = cookies.len(), "Cookies injected");
1073 Ok(())
1074 }
1075
1076 /// Capture a screenshot of the current page as PNG bytes.
1077 ///
1078 /// The screenshot is full-page by default (viewport clipped to the rendered
1079 /// layout area). Save the returned bytes to a `.png` file or process
1080 /// them in-memory.
1081 ///
1082 /// # Errors
1083 ///
1084 /// Returns [`BrowserError::CdpError`] if the CDP `Page.captureScreenshot`
1085 /// command fails, or [`BrowserError::Timeout`] if it exceeds
1086 /// `cdp_timeout`.
1087 ///
1088 /// # Example
1089 ///
1090 /// ```no_run
1091 /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1092 /// use std::{time::Duration, fs};
1093 ///
1094 /// # async fn run() -> stygian_browser::error::Result<()> {
1095 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1096 /// let handle = pool.acquire().await?;
1097 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1098 /// page.navigate("https://example.com", WaitUntil::Selector("body".to_string()), Duration::from_secs(30)).await?;
1099 /// let png = page.screenshot().await?;
1100 /// fs::write("screenshot.png", &png).unwrap();
1101 /// # Ok(())
1102 /// # }
1103 /// ```
1104 pub async fn screenshot(&self) -> Result<Vec<u8>> {
1105 use chromiumoxide::page::ScreenshotParams;
1106
1107 let params = ScreenshotParams::builder().full_page(true).build();
1108
1109 timeout(self.cdp_timeout, self.page.screenshot(params))
1110 .await
1111 .map_err(|_| BrowserError::Timeout {
1112 operation: "Page.captureScreenshot".to_string(),
1113 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1114 })?
1115 .map_err(|e| BrowserError::CdpError {
1116 operation: "Page.captureScreenshot".to_string(),
1117 message: e.to_string(),
1118 })
1119 }
1120
1121 /// Borrow the underlying chromiumoxide [`Page`].
1122 pub const fn inner(&self) -> &Page {
1123 &self.page
1124 }
1125
1126 /// Close this page (tab).
1127 ///
1128 /// Called automatically on drop; explicit call avoids suppressing the error.
1129 pub async fn close(self) -> Result<()> {
1130 timeout(Duration::from_secs(5), self.page.clone().close())
1131 .await
1132 .map_err(|_| BrowserError::Timeout {
1133 operation: "page.close".to_string(),
1134 duration_ms: 5000,
1135 })?
1136 .map_err(|e| BrowserError::CdpError {
1137 operation: "page.close".to_string(),
1138 message: e.to_string(),
1139 })
1140 }
1141}
1142
1143// ─── Stealth diagnostics ──────────────────────────────────────────────────────
1144
1145#[cfg(feature = "stealth")]
1146impl PageHandle {
1147 /// Run all built-in stealth detection checks against the current page.
1148 ///
1149 /// Iterates [`crate::diagnostic::all_checks`], evaluates each check's
1150 /// JavaScript via CDP `Runtime.evaluate`, and returns an aggregate
1151 /// [`crate::diagnostic::DiagnosticReport`].
1152 ///
1153 /// Failed scripts (due to JS exceptions or deserialization errors) are
1154 /// recorded as failing checks and do **not** abort the whole run.
1155 ///
1156 /// # Errors
1157 ///
1158 /// Returns an error only if the underlying CDP transport fails entirely.
1159 /// Individual check failures are captured in the report.
1160 ///
1161 /// # Example
1162 ///
1163 /// ```no_run
1164 /// # async fn run() -> stygian_browser::error::Result<()> {
1165 /// use stygian_browser::{BrowserPool, BrowserConfig};
1166 /// use stygian_browser::page::WaitUntil;
1167 /// use std::time::Duration;
1168 ///
1169 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1170 /// let handle = pool.acquire().await?;
1171 /// let browser = handle.browser().expect("valid browser");
1172 /// let mut page = browser.new_page().await?;
1173 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(10)).await?;
1174 ///
1175 /// let report = page.verify_stealth().await?;
1176 /// println!("Stealth: {}/{} checks passed", report.passed_count, report.checks.len());
1177 /// for failure in report.failures() {
1178 /// eprintln!(" FAIL {}: {}", failure.description, failure.details);
1179 /// }
1180 /// # Ok(())
1181 /// # }
1182 /// ```
1183 pub async fn verify_stealth(&self) -> Result<crate::diagnostic::DiagnosticReport> {
1184 use crate::diagnostic::{CheckResult, DiagnosticReport, all_checks};
1185
1186 let mut results: Vec<CheckResult> = Vec::new();
1187
1188 for check in all_checks() {
1189 let result = match self.eval::<String>(check.script).await {
1190 Ok(json) => check.parse_output(&json),
1191 Err(e) => {
1192 tracing::warn!(
1193 check = ?check.id,
1194 error = %e,
1195 "stealth check script failed during evaluation"
1196 );
1197 CheckResult {
1198 id: check.id,
1199 description: check.description.to_string(),
1200 passed: false,
1201 details: format!("script error: {e}"),
1202 }
1203 }
1204 };
1205 tracing::debug!(
1206 check = ?result.id,
1207 passed = result.passed,
1208 details = %result.details,
1209 "stealth check result"
1210 );
1211 results.push(result);
1212 }
1213
1214 Ok(DiagnosticReport::new(results))
1215 }
1216}
1217
1218impl Drop for PageHandle {
1219 fn drop(&mut self) {
1220 warn!("PageHandle dropped without explicit close(); spawning cleanup task");
1221 // chromiumoxide Page does not implement close on Drop, so we spawn
1222 // a fire-and-forget task. The page ref is already owned; we need to
1223 // swap it out. We clone the Page handle (it's Arc-backed internally).
1224 let page = self.page.clone();
1225 tokio::spawn(async move {
1226 let _ = page.close().await;
1227 });
1228 }
1229}
1230
1231// ─── Tests ────────────────────────────────────────────────────────────────────
1232
1233#[cfg(test)]
1234mod tests {
1235 use super::*;
1236
1237 #[test]
1238 fn resource_filter_block_media_blocks_image() {
1239 let filter = ResourceFilter::block_media();
1240 assert!(filter.should_block("Image"));
1241 assert!(filter.should_block("Font"));
1242 assert!(filter.should_block("Stylesheet"));
1243 assert!(filter.should_block("Media"));
1244 assert!(!filter.should_block("Script"));
1245 assert!(!filter.should_block("XHR"));
1246 }
1247
1248 #[test]
1249 fn resource_filter_case_insensitive() {
1250 let filter = ResourceFilter::block_images_and_fonts();
1251 assert!(filter.should_block("image")); // lowercase
1252 assert!(filter.should_block("IMAGE")); // uppercase
1253 assert!(!filter.should_block("Stylesheet"));
1254 }
1255
1256 #[test]
1257 fn resource_filter_builder_chain() {
1258 let filter = ResourceFilter::default()
1259 .block(ResourceType::Image)
1260 .block(ResourceType::Font);
1261 assert!(filter.should_block("Image"));
1262 assert!(filter.should_block("Font"));
1263 assert!(!filter.should_block("Stylesheet"));
1264 }
1265
1266 #[test]
1267 fn resource_filter_dedup_block() {
1268 let filter = ResourceFilter::default()
1269 .block(ResourceType::Image)
1270 .block(ResourceType::Image); // duplicate
1271 assert_eq!(filter.blocked.len(), 1);
1272 }
1273
1274 #[test]
1275 fn resource_filter_is_empty_when_default() {
1276 assert!(ResourceFilter::default().is_empty());
1277 assert!(!ResourceFilter::block_media().is_empty());
1278 }
1279
1280 #[test]
1281 fn wait_until_selector_stores_string() {
1282 let w = WaitUntil::Selector("#foo".to_string());
1283 assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
1284 }
1285
1286 #[test]
1287 fn resource_type_cdp_str() {
1288 assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
1289 assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
1290 assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
1291 assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
1292 }
1293
1294 /// `PageHandle` must be `Send + Sync` for use across thread boundaries.
1295 #[test]
1296 fn page_handle_is_send_sync() {
1297 fn assert_send<T: Send>() {}
1298 fn assert_sync<T: Sync>() {}
1299 assert_send::<PageHandle>();
1300 assert_sync::<PageHandle>();
1301 }
1302
1303 /// The status-code sentinel (0 = "not yet captured") and the conversion to
1304 /// `Option<u16>` are pure-logic invariants testable without a live browser.
1305 #[test]
1306 fn status_code_sentinel_zero_maps_to_none() {
1307 use std::sync::atomic::{AtomicU16, Ordering};
1308 let atom = AtomicU16::new(0);
1309 let code = atom.load(Ordering::Acquire);
1310 assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
1311 }
1312
1313 #[test]
1314 fn status_code_non_zero_maps_to_some() {
1315 use std::sync::atomic::{AtomicU16, Ordering};
1316 for &expected in &[200u16, 301, 404, 503] {
1317 let atom = AtomicU16::new(expected);
1318 let code = atom.load(Ordering::Acquire);
1319 assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
1320 }
1321 }
1322
1323 // ── NodeHandle pure-logic tests ───────────────────────────────────────────
1324
1325 /// `attr_map` relies on `chunks_exact(2)` — verify the pairing logic is
1326 /// correct without a live browser by exercising it directly.
1327 #[test]
1328 fn attr_map_chunking_pairs_correctly() {
1329 let flat = [
1330 "id".to_string(),
1331 "main".to_string(),
1332 "data-ux".to_string(),
1333 "Section".to_string(),
1334 "class".to_string(),
1335 "container".to_string(),
1336 ];
1337 let mut map = std::collections::HashMap::with_capacity(flat.len() / 2);
1338 for pair in flat.chunks_exact(2) {
1339 if let [name, value] = pair {
1340 map.insert(name.clone(), value.clone());
1341 }
1342 }
1343 assert_eq!(map.get("id").map(String::as_str), Some("main"));
1344 assert_eq!(map.get("data-ux").map(String::as_str), Some("Section"));
1345 assert_eq!(map.get("class").map(String::as_str), Some("container"));
1346 assert_eq!(map.len(), 3);
1347 }
1348
1349 /// Odd-length flat attribute lists (malformed CDP response) are handled
1350 /// gracefully — the trailing element is silently ignored.
1351 #[test]
1352 fn attr_map_chunking_ignores_odd_trailing() {
1353 let flat = ["orphan".to_string()]; // no value
1354 let mut map = std::collections::HashMap::new();
1355 for pair in flat.chunks_exact(2) {
1356 if let [name, value] = pair {
1357 map.insert(name.clone(), value.clone());
1358 }
1359 }
1360 assert!(map.is_empty());
1361 }
1362
1363 /// Empty flat list → empty map.
1364 #[test]
1365 fn attr_map_chunking_empty_input() {
1366 let flat: Vec<String> = vec![];
1367 let map: std::collections::HashMap<String, String> = flat
1368 .chunks_exact(2)
1369 .filter_map(|pair| {
1370 if let [name, value] = pair {
1371 Some((name.clone(), value.clone()))
1372 } else {
1373 None
1374 }
1375 })
1376 .collect();
1377 assert!(map.is_empty());
1378 }
1379
1380 /// `ancestors` JSON parsing: valid input round-trips correctly.
1381 #[test]
1382 fn ancestors_json_parse_round_trip() -> std::result::Result<(), serde_json::Error> {
1383 let json = r#"["p","article","body","html"]"#;
1384 let result: Vec<String> = serde_json::from_str(json)?;
1385 assert_eq!(result, ["p", "article", "body", "html"]);
1386 Ok(())
1387 }
1388
1389 /// `ancestors` JSON parsing: empty array (no parent) is fine.
1390 #[test]
1391 fn ancestors_json_parse_empty() -> std::result::Result<(), serde_json::Error> {
1392 let json = "[]";
1393 let result: Vec<String> = serde_json::from_str(json)?;
1394 assert!(result.is_empty());
1395 Ok(())
1396 }
1397}