ferrous_browser/page.rs
1use serde::de::DeserializeOwned;
2use serde::{Deserialize, Serialize};
3use serde_json::{json, Value};
4use std::sync::Arc;
5use tokio::time::{timeout, Duration};
6use tracing::Instrument;
7
8use crate::cdp::CDPClient;
9use crate::error::{BrowserError, Result};
10use crate::har::HarCapture;
11
12// ─── P2: WaitUntil enum ──────────────────────────────────────────────────────
13
14/// Controls when [`Page::goto`] considers navigation complete.
15#[derive(Debug, Clone, Copy, Default)]
16pub enum WaitUntil {
17 /// Wait for `Page.domContentEventFired` — the DOM is parsed but
18 /// sub-resources (images, stylesheets) may still be loading.
19 DomContentLoaded,
20 /// Wait for `Page.loadEventFired` — all resources have loaded.
21 /// This is the default.
22 #[default]
23 Load,
24 /// Wait until there are no in-flight network requests for 500 ms.
25 /// Useful for SPAs that fetch data after the load event.
26 NetworkIdle,
27}
28
29// ─── P2B: Cookie ─────────────────────────────────────────────────────────────
30
31/// Represents a browser cookie for session persistence.
32///
33/// # Example
34///
35/// ```no_run
36/// # use ferrous_browser::{Browser, Cookie, WaitUntil};
37/// # #[tokio::main]
38/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
39/// let browser = Browser::launch().await?;
40/// let page = browser.new_page().await?;
41/// let cookies = vec![Cookie {
42/// name: "session".to_string(),
43/// value: "abc123".to_string(),
44/// ..Default::default()
45/// }];
46/// page.set_cookies(&cookies).await?;
47/// let retrieved = page.cookies().await?;
48/// # Ok(())
49/// # }
50/// ```
51#[derive(Debug, Clone, Serialize, Deserialize, Default)]
52pub struct Cookie {
53 /// Cookie name
54 pub name: String,
55 /// Cookie value
56 pub value: String,
57 /// Cookie domain (default: page domain)
58 #[serde(skip_serializing_if = "Option::is_none")]
59 pub domain: Option<String>,
60 /// Cookie path (default: "/")
61 #[serde(skip_serializing_if = "Option::is_none")]
62 pub path: Option<String>,
63 /// Seconds since epoch when cookie expires (default: session cookie)
64 #[serde(skip_serializing_if = "Option::is_none")]
65 pub expires: Option<f64>,
66 /// HTTPS only flag
67 #[serde(default)]
68 pub secure: bool,
69 /// HTTP only flag (not accessible via JavaScript)
70 #[serde(default, rename = "httpOnly")]
71 pub http_only: bool,
72 /// SameSite attribute ("Strict", "Lax", "None")
73 #[serde(skip_serializing_if = "Option::is_none", rename = "sameSite")]
74 pub same_site: Option<String>,
75}
76
77// ─── P3: Locator ─────────────────────────────────────────────────────────────
78
79/// A lazy handle to a DOM element identified by a CSS selector.
80///
81/// Locators are created with [`Page::locator`] and make the common
82/// "find-then-act" pattern ergonomic and composable.
83///
84/// # Example
85///
86/// ```no_run
87/// # use ferrous_browser::{Browser, WaitUntil};
88/// # #[tokio::main]
89/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
90/// let browser = Browser::launch().await?;
91/// let page = browser.new_page().await?;
92/// page.goto("https://example.com", WaitUntil::Load).await?;
93///
94/// // Locator API
95/// page.locator("button#submit").click().await?;
96/// page.locator("input[name=q]").type_text("hello").await?;
97/// page.locator(".result").wait_for().await?;
98/// # Ok(())
99/// # }
100/// ```
101#[derive(Clone)]
102pub struct Locator {
103 selector: String,
104 page: Page,
105}
106
107impl Locator {
108 fn new(selector: impl Into<String>, page: Page) -> Self {
109 Self {
110 selector: selector.into(),
111 page,
112 }
113 }
114
115 /// Click the element identified by this locator.
116 pub async fn click(&self) -> Result<()> {
117 self.page.click_selector(&self.selector).await
118 }
119
120 /// Type text into the element identified by this locator.
121 pub async fn type_text(&self, text: &str) -> Result<()> {
122 self.page.type_text_selector(&self.selector, text).await
123 }
124
125 /// Wait until the element is present in the DOM (30 s default timeout).
126 pub async fn wait_for(&self) -> Result<()> {
127 self.page.wait_for_selector(&self.selector).await
128 }
129
130 /// Wait until the element is present with a custom timeout.
131 pub async fn wait_for_timeout(&self, dur: Duration) -> Result<()> {
132 self.page
133 .wait_for_selector_with_timeout(&self.selector, dur)
134 .await
135 }
136
137 /// Get the inner text of the element.
138 pub async fn inner_text(&self) -> Result<String> {
139 let expr = format!(
140 "document.querySelector('{}')?.innerText ?? ''",
141 escape_selector(&self.selector)
142 );
143 let result = self
144 .page
145 .send_command(
146 "Runtime.evaluate".to_string(),
147 Some(json!({ "expression": expr, "returnByValue": true })),
148 )
149 .await?;
150 result
151 .get("result")
152 .and_then(|r| r.get("value"))
153 .and_then(|v| v.as_str())
154 .map(|s| s.to_string())
155 .ok_or_else(|| {
156 BrowserError::invalid_response(
157 format!("inner_text('{}')", self.selector),
158 "unexpected result shape",
159 )
160 })
161 }
162
163 /// Get an attribute value of the element.
164 pub async fn get_attribute(&self, name: &str) -> Result<Option<String>> {
165 let expr = format!(
166 "document.querySelector('{}')?.getAttribute('{}') ?? null",
167 escape_selector(&self.selector),
168 name,
169 );
170 let result = self
171 .page
172 .send_command(
173 "Runtime.evaluate".to_string(),
174 Some(json!({ "expression": expr, "returnByValue": true })),
175 )
176 .await?;
177 let val = result.get("result").and_then(|r| r.get("value"));
178 match val {
179 Some(Value::String(s)) => Ok(Some(s.clone())),
180 Some(Value::Null) | None => Ok(None),
181 _ => Ok(val.map(|v| v.to_string())),
182 }
183 }
184}
185
186// ─── Page ────────────────────────────────────────────────────────────────────
187
188/// A handle to a single page/tab in the browser.
189///
190/// Page provides methods for interacting with a specific page or tab,
191/// including navigation, content retrieval, screenshot capture, and
192/// element interaction.
193///
194/// # Example
195///
196/// ```no_run
197/// use ferrous_browser::{Browser, WaitUntil};
198///
199/// # #[tokio::main]
200/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
201/// let browser = Browser::launch().await?;
202/// let page = browser.new_page().await?;
203///
204/// page.goto("https://example.com", WaitUntil::Load).await?;
205/// let html = page.content().await?;
206/// let screenshot = page.screenshot().await?;
207/// # Ok(())
208/// # }
209/// ```
210#[derive(Clone)]
211pub struct Page {
212 /// Target/page ID
213 pub target_id: String,
214 /// Session ID for routing CDP commands
215 pub session_id: String,
216 /// Reference to CDP client
217 cdp: Arc<CDPClient>,
218 /// Lazily-enabled Page domain: the first `goto` (or anything else that
219 /// needs Page events) drives the one-time `Page.enable` round-trip; any
220 /// concurrent callers wait on it; subsequent callers see it already
221 /// resolved and pay nothing. Clones of a `Page` share this state via Arc.
222 page_enabled: Arc<tokio::sync::OnceCell<()>>,
223}
224
225impl Page {
226 /// Create a new page handle
227 #[doc(hidden)]
228 pub fn new(target_id: String, session_id: String, cdp: Arc<CDPClient>) -> Self {
229 Page {
230 target_id,
231 session_id,
232 cdp,
233 page_enabled: Arc::new(tokio::sync::OnceCell::new()),
234 }
235 }
236
237 /// Ensure the Page domain is enabled on this session. Cheap on every
238 /// call after the first: once `get_or_init` resolves, subsequent calls
239 /// return synchronously from the OnceCell without locking or awaiting.
240 ///
241 /// `Page.enable` only needs to fire once per session (the events it
242 /// unlocks are sticky), so cache completion in a OnceCell. Concurrent
243 /// first-callers cooperate; later callers pay nothing.
244 async fn ensure_page_enabled(&self) {
245 let cdp = self.cdp.clone();
246 let sid = self.session_id.clone();
247 self.page_enabled
248 .get_or_init(|| async move {
249 // Swallow the error: a failed Page.enable surfaces later as a
250 // clearer navigation timeout, and turning a transient Chrome
251 // hiccup at session init into a permanent failure for this
252 // Page is worse than a deferred error.
253 let _ = cdp
254 .send_command_with_session(&sid, "Page.enable".to_string(), None)
255 .await;
256 })
257 .await;
258 }
259
260 // ─── P3: Locator entry point ──────────────────────────────────────────
261
262 /// Create a [`Locator`] for the given CSS selector.
263 ///
264 /// # Example
265 ///
266 /// ```no_run
267 /// # use ferrous_browser::{Browser, WaitUntil};
268 /// # #[tokio::main]
269 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
270 /// let browser = Browser::launch().await?;
271 /// let page = browser.new_page().await?;
272 /// page.goto("https://example.com", WaitUntil::Load).await?;
273 ///
274 /// page.locator("button#submit").click().await?;
275 /// page.locator("input[name=q]").type_text("rust").await?;
276 /// page.locator(".result").wait_for().await?;
277 /// # Ok(())
278 /// # }
279 /// ```
280 pub fn locator(&self, selector: &str) -> Locator {
281 Locator::new(selector, self.clone())
282 }
283
284 // ─── P2: goto with WaitUntil ─────────────────────────────────────────
285
286 /// Navigate to a URL and wait for the specified condition.
287 ///
288 /// # Arguments
289 ///
290 /// * `url` — The URL to navigate to
291 /// * `wait_until` — When to consider navigation complete
292 ///
293 /// # Example
294 ///
295 /// ```no_run
296 /// # use ferrous_browser::{Browser, WaitUntil};
297 /// # #[tokio::main]
298 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
299 /// let browser = Browser::launch().await?;
300 /// let page = browser.new_page().await?;
301 /// page.goto("https://example.com", WaitUntil::Load).await?;
302 /// page.goto("https://example.com", WaitUntil::DomContentLoaded).await?;
303 /// page.goto("https://example.com", WaitUntil::NetworkIdle).await?;
304 /// # Ok(())
305 /// # }
306 /// ```
307 #[tracing::instrument(level = "info", skip(self), fields(url = %url, wait_until = ?wait_until, session_id = %self.session_id))]
308 pub async fn goto(&self, url: &str, wait_until: WaitUntil) -> Result<()> {
309 const TIMEOUT_SECS: u64 = 30;
310 let url_owned = url.to_string();
311 // Capture session_id so the async block can own it
312 let session_id = self.session_id.clone();
313
314 let event_method = match wait_until {
315 WaitUntil::DomContentLoaded => "Page.domContentEventFired",
316 WaitUntil::Load | WaitUntil::NetworkIdle => "Page.loadEventFired",
317 };
318
319 // ── Subscribe BEFORE sending any command (race-condition fix) ─────────
320 // Filter by BOTH method name AND session_id so concurrent pages never
321 // receive each other's load events (multi-page isolation fix).
322 let mut event_rx = self.cdp.subscribe_events();
323 // ─────────────────────────────────────────────────────────────────────
324
325 // First goto on this page also enables Page domain events. All
326 // concurrent gotos on the same Page cooperate via OnceCell rather
327 // than each sending their own Page.enable.
328 self.ensure_page_enabled().await;
329
330 let response = self
331 .send_command("Page.navigate".to_string(), Some(json!({ "url": url })))
332 .await?;
333
334 if let Some(error_text) = response.get("errorText").and_then(|v| v.as_str()) {
335 return Err(BrowserError::navigation_failed(&url_owned, error_text));
336 }
337
338 let wait_result = timeout(Duration::from_secs(TIMEOUT_SECS), async {
339 match wait_until {
340 WaitUntil::NetworkIdle => {
341 let mut last_activity = tokio::time::Instant::now();
342 loop {
343 tokio::select! {
344 recv = event_rx.recv() => {
345 match recv {
346 Ok(msg)
347 if msg.session_id.as_deref() == Some(&session_id) =>
348 {
349 last_activity = tokio::time::Instant::now();
350 }
351 Ok(_) => {} // different session
352 Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {
353 last_activity = tokio::time::Instant::now();
354 }
355 Err(_) => {}
356 }
357 }
358 _ = tokio::time::sleep(Duration::from_millis(50)) => {
359 if last_activity.elapsed() >= Duration::from_millis(500) {
360 return Ok::<(), BrowserError>(());
361 }
362 }
363 }
364 }
365 }
366 _ => loop {
367 match event_rx.recv().await {
368 Ok(msg)
369 if msg.method.as_deref() == Some(event_method)
370 && msg.session_id.as_deref() == Some(&session_id) =>
371 {
372 return Ok(());
373 }
374 Ok(_) => {} // wrong session or wrong event
375 Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {
376 return Ok(()); // assume fired
377 }
378 Err(_) => tokio::time::sleep(Duration::from_millis(50)).await,
379 }
380 },
381 }
382 }.instrument(tracing::info_span!("await_navigation", event = event_method)))
383 .await;
384
385 wait_result.map_err(|_| {
386 BrowserError::timeout(format!("navigating to '{}'", url_owned), TIMEOUT_SECS)
387 })?
388 }
389
390 // ─── evaluate ─────────────────────────────────────────────────────────
391
392 /// Evaluate a JavaScript expression and return a remote object handle.
393 ///
394 /// This is useful when you need a reference to a JavaScript object without
395 /// serializing it back to Rust. The returned handle is valid only for this
396 /// session and should be disposed of when no longer needed.
397 ///
398 /// # Example
399 ///
400 /// ```no_run
401 /// # use ferrous_browser::{Browser, WaitUntil};
402 /// # #[tokio::main]
403 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
404 /// let browser = Browser::launch_chrome(None).await?;
405 /// let page = browser.new_page().await?;
406 /// page.goto("https://example.com", WaitUntil::Load).await?;
407 /// // Get a remote reference to an object
408 /// let handle = page.evaluate_handle("document.body").await?;
409 /// println!("Remote object handle: {}", handle);
410 /// # Ok(())
411 /// # }
412 /// ```
413 pub async fn evaluate_handle(&self, expression: &str) -> Result<String> {
414 let result = self
415 .send_command(
416 "Runtime.evaluate".to_string(),
417 Some(json!({
418 "expression": expression,
419 "returnByValue": false
420 })),
421 )
422 .await?;
423
424 if let Some(exc) = result.get("exceptionDetails") {
425 let msg = exc
426 .get("exception")
427 .and_then(|e| e.get("description"))
428 .and_then(|d| d.as_str())
429 .unwrap_or("unknown JS exception");
430 return Err(BrowserError::command_failed("Runtime.evaluate", msg));
431 }
432
433 result
434 .get("result")
435 .and_then(|v| v.get("objectId"))
436 .and_then(|v| v.as_str())
437 .map(|s| s.to_string())
438 .ok_or_else(|| {
439 BrowserError::invalid_response(
440 "evaluate_handle()",
441 "missing result.objectId — may have evaluated to a primitive",
442 )
443 })
444 }
445
446 /// Evaluate a JavaScript expression in the page context and deserialize the
447 /// result as `T`.
448 ///
449 /// # Example
450 ///
451 /// ```no_run
452 /// # use ferrous_browser::{Browser, WaitUntil};
453 /// # #[tokio::main]
454 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
455 /// let browser = Browser::launch_chrome(None).await?;
456 /// let page = browser.new_page().await?;
457 /// page.goto("https://example.com", WaitUntil::Load).await?;
458 /// let title: String = page.evaluate("document.title").await?;
459 /// let count: u64 = page.evaluate("document.querySelectorAll('a').length").await?;
460 /// # Ok(())
461 /// # }
462 /// ```
463 #[tracing::instrument(level = "info", skip(self), fields(expression_len = expression.len()))]
464 pub async fn evaluate<T: DeserializeOwned>(&self, expression: &str) -> Result<T> {
465 let result = self
466 .send_command(
467 "Runtime.evaluate".to_string(),
468 Some(json!({
469 "expression": expression,
470 "returnByValue": true,
471 "awaitPromise": true,
472 })),
473 )
474 .await?;
475
476 if let Some(exc) = result.get("exceptionDetails") {
477 let msg = exc
478 .get("exception")
479 .and_then(|e| e.get("description"))
480 .and_then(|d| d.as_str())
481 .unwrap_or("unknown JS exception");
482 return Err(BrowserError::command_failed("Runtime.evaluate", msg));
483 }
484
485 let value = result
486 .get("result")
487 .and_then(|r| r.get("value"))
488 .cloned()
489 .unwrap_or(Value::Null);
490
491 serde_json::from_value(value)
492 .map_err(|e| BrowserError::invalid_response("evaluate()", e.to_string()))
493 }
494
495 // ─── Wait helpers ─────────────────────────────────────────────────────
496
497 /// Wait for an element matching `selector` to appear in the DOM.
498 ///
499 /// Uses a 30-second timeout.
500 pub async fn wait_for_selector(&self, selector: &str) -> Result<()> {
501 self.wait_for_selector_with_timeout(selector, Duration::from_secs(30))
502 .await
503 }
504
505 /// Wait for an element matching `selector` with a custom timeout.
506 ///
507 /// Implementation note: we push the *entire* wait into Chrome with a
508 /// MutationObserver-backed Promise and use `Runtime.evaluate`'s
509 /// `awaitPromise: true` so Chrome holds the response until the element
510 /// appears (or the timer fires). Net result is one CDP round-trip per
511 /// call and a reaction latency bounded by the DOM mutation that
512 /// inserted the element, not by a polling interval.
513 pub async fn wait_for_selector_with_timeout(
514 &self,
515 selector: &str,
516 dur: Duration,
517 ) -> Result<()> {
518 let timeout_ms = dur.as_millis() as u64;
519 // The selector is interpolated into a JS string literal, so escape
520 // anything that would break out of it. serde_json::to_string gives
521 // us a properly-quoted JS string for free.
522 let selector_lit = serde_json::to_string(selector).expect("selector is valid utf-8");
523
524 let expr = format!(
525 r#"new Promise((resolve) => {{
526 const sel = {selector_lit};
527 if (document.querySelector(sel)) {{ resolve(true); return; }}
528 const observer = new MutationObserver(() => {{
529 if (document.querySelector(sel)) {{
530 observer.disconnect();
531 clearTimeout(timer);
532 resolve(true);
533 }}
534 }});
535 const timer = setTimeout(() => {{
536 observer.disconnect();
537 resolve(false);
538 }}, {timeout_ms});
539 observer.observe(document, {{
540 childList: true, subtree: true, attributes: true
541 }});
542 }})"#
543 );
544
545 let result = self
546 .send_command(
547 "Runtime.evaluate".to_string(),
548 Some(json!({
549 "expression": expr,
550 "returnByValue": true,
551 "awaitPromise": true,
552 })),
553 )
554 .await?;
555
556 let appeared = result
557 .get("result")
558 .and_then(|r| r.get("value"))
559 .and_then(|v| v.as_bool())
560 .unwrap_or(false);
561
562 if appeared {
563 Ok(())
564 } else {
565 Err(BrowserError::timeout(
566 format!("waiting for selector '{selector}'"),
567 dur.as_secs(),
568 ))
569 }
570 }
571
572 // ─── Interaction helpers (internal, also used by Locator) ─────────────
573
574 /// Click an element matching the selector (internal implementation).
575 pub(crate) async fn click_selector(&self, selector: &str) -> Result<()> {
576 let expr = format!(
577 "document.querySelector('{}').click()",
578 escape_selector(selector),
579 );
580 self.send_command(
581 "Runtime.evaluate".to_string(),
582 Some(json!({ "expression": expr })),
583 )
584 .await?;
585 Ok(())
586 }
587
588 /// Type text into an element (internal implementation).
589 pub(crate) async fn type_text_selector(&self, selector: &str, text: &str) -> Result<()> {
590 let focus_expr = format!(
591 "document.querySelector('{}').focus()",
592 escape_selector(selector)
593 );
594 self.send_command(
595 "Runtime.evaluate".to_string(),
596 Some(json!({ "expression": focus_expr })),
597 )
598 .await?;
599
600 for ch in text.chars() {
601 self.send_command(
602 "Input.dispatchKeyEvent".to_string(),
603 Some(json!({
604 "type": "char",
605 "text": ch.to_string(),
606 })),
607 )
608 .await?;
609 }
610 Ok(())
611 }
612
613 // ─── Public raw-selector methods (legacy / power-user API) ────────────
614
615 /// Click an element matching the CSS selector.
616 ///
617 /// Prefer [`Page::locator`] for new code.
618 pub async fn click(&self, selector: &str) -> Result<()> {
619 self.click_selector(selector).await
620 }
621
622 /// Type text into an input element matching the CSS selector.
623 ///
624 /// Prefer [`Page::locator`] for new code.
625 pub async fn type_text(&self, selector: &str, text: &str) -> Result<()> {
626 self.type_text_selector(selector, text).await
627 }
628
629 // ─── Content / screenshot ────────────────────────────────────────────
630
631 /// Get the full HTML content of the page.
632 ///
633 /// # Example
634 ///
635 /// ```no_run
636 /// # use ferrous_browser::{Browser, WaitUntil};
637 /// # #[tokio::main]
638 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
639 /// let browser = Browser::launch().await?;
640 /// let page = browser.new_page().await?;
641 /// page.goto("https://example.com", WaitUntil::Load).await?;
642 /// let html = page.content().await?;
643 /// println!("HTML: {}", html);
644 /// # Ok(())
645 /// # }
646 /// ```
647 #[tracing::instrument(level = "info", skip(self))]
648 pub async fn content(&self) -> Result<String> {
649 let result = self
650 .send_command(
651 "Runtime.evaluate".to_string(),
652 Some(json!({ "expression": "document.documentElement.outerHTML" })),
653 )
654 .await?;
655
656 result
657 .get("result")
658 .and_then(|v| v.get("value"))
659 .and_then(|v| v.as_str())
660 .map(|s| s.to_string())
661 .ok_or_else(|| {
662 BrowserError::invalid_response("content()", "missing result.value string")
663 })
664 }
665
666 /// Take a screenshot of the page and return PNG bytes.
667 ///
668 /// # Example
669 ///
670 /// ```no_run
671 /// # use ferrous_browser::{Browser, WaitUntil};
672 /// # #[tokio::main]
673 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
674 /// let browser = Browser::launch().await?;
675 /// let page = browser.new_page().await?;
676 /// page.goto("https://example.com", WaitUntil::Load).await?;
677 /// let png = page.screenshot().await?;
678 /// std::fs::write("screenshot.png", png)?;
679 /// # Ok(())
680 /// # }
681 /// ```
682 #[tracing::instrument(level = "info", skip(self))]
683 pub async fn screenshot(&self) -> Result<Vec<u8>> {
684 let result = self
685 .send_command("Page.captureScreenshot".to_string(), None)
686 .await?;
687
688 let base64_data = result
689 .get("data")
690 .and_then(|v| v.as_str())
691 .ok_or_else(|| BrowserError::invalid_response("screenshot()", "missing data field"))?;
692
693 tracing::info_span!("base64_decode", b64_len = base64_data.len())
694 .in_scope(|| base64_decode(base64_data))
695 }
696
697 // ─── Network interception ────────────────────────────────────────────
698
699 /// Intercept network requests matching a pattern.
700 ///
701 /// Enables request interception and calls the callback for matching
702 /// requests. The callback receives `(url, resource_type)` and returns
703 /// `true` to abort the request.
704 pub async fn intercept_requests<F>(&self, callback: F) -> Result<()>
705 where
706 F: Fn(&str, &str) -> bool + Send + 'static,
707 {
708 let _ = self.send_command("Network.enable".to_string(), None).await;
709 let _ = self
710 .send_command(
711 "Network.setRequestInterception".to_string(),
712 Some(json!({ "patterns": [{ "urlPattern": "*" }] })),
713 )
714 .await;
715
716 // ── P1: Subscribe BEFORE the enable command fires events ─────────────
717 let mut event_rx = self.cdp.subscribe_events();
718 // ────────────────────────────────────────────────────────────────────
719
720 let cdp = self.cdp.clone();
721 let session_id = self.session_id.clone();
722 tokio::spawn(async move {
723 while let Ok(msg) = event_rx.recv().await {
724 // Only handle Network.requestIntercepted for this page's session
725 if msg.method.as_deref() != Some("Network.requestIntercepted") {
726 continue;
727 }
728 if msg.session_id.as_deref() != Some(&session_id) {
729 continue;
730 }
731 if let Some(params) = msg.params {
732 let url = params
733 .get("request")
734 .and_then(|r| r.get("url"))
735 .and_then(|u| u.as_str())
736 .unwrap_or("");
737 let resource_type = params
738 .get("request")
739 .and_then(|r| r.get("resourceType"))
740 .and_then(|r| r.as_str())
741 .unwrap_or("");
742 let request_id = params
743 .get("requestId")
744 .and_then(|r| r.as_str())
745 .unwrap_or("");
746
747 let should_abort = callback(url, resource_type);
748
749 let cdp_method = if should_abort {
750 "Network.abortRequest"
751 } else {
752 "Network.continueInterceptedRequest"
753 };
754
755 let _ = cdp
756 .send_command_with_session(
757 &session_id,
758 cdp_method.to_string(),
759 Some(json!({ "requestId": request_id })),
760 )
761 .await;
762 }
763 }
764 });
765
766 Ok(())
767 }
768
769 // ─── Session persistence ────────────────────────────────────────────────
770
771 /// Get all cookies from the page.
772 ///
773 /// Retrieves all cookies visible to the current page, including
774 /// expired cookies if they are still in the cookie jar.
775 ///
776 /// # Example
777 ///
778 /// ```no_run
779 /// # use ferrous_browser::{Browser, WaitUntil};
780 /// # #[tokio::main]
781 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
782 /// let browser = Browser::launch().await?;
783 /// let page = browser.new_page().await?;
784 /// page.goto("https://example.com", WaitUntil::Load).await?;
785 /// let cookies = page.cookies().await?;
786 /// for cookie in cookies {
787 /// println!("{}={}", cookie.name, cookie.value);
788 /// }
789 /// # Ok(())
790 /// # }
791 /// ```
792 pub async fn cookies(&self) -> Result<Vec<Cookie>> {
793 let result = self
794 .send_command("Network.getCookies".to_string(), None)
795 .await?;
796
797 let cookies_array = result
798 .get("cookies")
799 .and_then(|v| v.as_array())
800 .ok_or_else(|| BrowserError::invalid_response("cookies()", "missing cookies array"))?;
801
802 let mut cookies = Vec::new();
803 for cookie_val in cookies_array {
804 if let Ok(cookie) = serde_json::from_value::<Cookie>(cookie_val.clone()) {
805 cookies.push(cookie);
806 }
807 }
808
809 Ok(cookies)
810 }
811
812 /// Set cookies for the page (session persistence).
813 ///
814 /// Sets one or more cookies that will be visible to JavaScript and HTTP requests.
815 /// Typically called before navigation to pre-populate cookies for authentication.
816 ///
817 /// # Example
818 ///
819 /// ```no_run
820 /// # use ferrous_browser::{Browser, Cookie, WaitUntil};
821 /// # #[tokio::main]
822 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
823 /// let browser = Browser::launch().await?;
824 /// let page = browser.new_page().await?;
825 /// let cookies = vec![Cookie {
826 /// name: "session_id".to_string(),
827 /// value: "abc123xyz".to_string(),
828 /// domain: Some("example.com".to_string()),
829 /// ..Default::default()
830 /// }];
831 /// page.set_cookies(&cookies).await?;
832 /// page.goto("https://example.com", WaitUntil::Load).await?;
833 /// # Ok(())
834 /// # }
835 /// ```
836 pub async fn set_cookies(&self, cookies: &[Cookie]) -> Result<()> {
837 // Convert cookies to JSON array with proper formatting for CDP
838 let cookie_params: Vec<Value> = cookies
839 .iter()
840 .map(|c| {
841 let mut obj = json!({
842 "name": c.name,
843 "value": c.value,
844 });
845 if let Some(domain) = &c.domain {
846 obj["domain"] = json!(domain);
847 }
848 if let Some(path) = &c.path {
849 obj["path"] = json!(path);
850 }
851 if let Some(expires) = c.expires {
852 obj["expires"] = json!(expires);
853 }
854 if c.secure {
855 obj["secure"] = json!(true);
856 }
857 if c.http_only {
858 obj["httpOnly"] = json!(true);
859 }
860 if let Some(same_site) = &c.same_site {
861 obj["sameSite"] = json!(same_site);
862 }
863 obj
864 })
865 .collect();
866
867 self.send_command(
868 "Network.setCookies".to_string(),
869 Some(json!({ "cookies": cookie_params })),
870 )
871 .await?;
872
873 Ok(())
874 }
875
876 // ─── PDF Export ──────────────────────────────────────────────────────────
877
878 /// Export the page as PDF and return the bytes.
879 ///
880 /// Converts the current page to PDF format. By default, includes all pages
881 /// and uses A4 paper size in portrait mode.
882 ///
883 /// # Example
884 ///
885 /// ```no_run
886 /// # use ferrous_browser::{Browser, WaitUntil};
887 /// # #[tokio::main]
888 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
889 /// let browser = Browser::launch().await?;
890 /// let page = browser.new_page().await?;
891 /// page.goto("https://example.com", WaitUntil::Load).await?;
892 /// let pdf = page.pdf().await?;
893 /// std::fs::write("page.pdf", pdf)?;
894 /// # Ok(())
895 /// # }
896 /// ```
897 pub async fn pdf(&self) -> Result<Vec<u8>> {
898 self.pdf_with_options(None).await
899 }
900
901 /// Export the page as PDF with custom options.
902 ///
903 /// Allows control over paper size, margins, scale, landscape mode, and more.
904 ///
905 /// # Example
906 ///
907 /// ```no_run
908 /// # use ferrous_browser::{Browser, WaitUntil};
909 /// # use serde_json::json;
910 /// # #[tokio::main]
911 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
912 /// let browser = Browser::launch().await?;
913 /// let page = browser.new_page().await?;
914 /// page.goto("https://example.com", WaitUntil::Load).await?;
915 /// let options = json!({
916 /// "landscape": true,
917 /// "scale": 1.5,
918 /// "paperWidth": 11.0,
919 /// "paperHeight": 8.5,
920 /// });
921 /// let pdf = page.pdf_with_options(Some(&options)).await?;
922 /// # Ok(())
923 /// # }
924 /// ```
925 pub async fn pdf_with_options(&self, options: Option<&Value>) -> Result<Vec<u8>> {
926 let mut params = json!({
927 "landscape": false,
928 "displayHeaderFooter": false,
929 "scale": 1.0,
930 "paperWidth": 8.5,
931 "paperHeight": 11.0,
932 "marginTop": 0.4,
933 "marginBottom": 0.4,
934 "marginLeft": 0.4,
935 "marginRight": 0.4,
936 "preferCSSPageSize": true,
937 "transferMode": "ReturnAsBase64",
938 });
939
940 // Merge with provided options
941 if let Some(opts) = options {
942 if let Some(obj) = params.as_object_mut() {
943 if let Some(opts_obj) = opts.as_object() {
944 for (key, value) in opts_obj.iter() {
945 obj.insert(key.clone(), value.clone());
946 }
947 }
948 }
949 }
950
951 let result = self
952 .send_command("Page.printToPDF".to_string(), Some(params))
953 .await?;
954
955 let base64_data = result
956 .get("data")
957 .and_then(|v| v.as_str())
958 .ok_or_else(|| BrowserError::invalid_response("pdf()", "missing data field"))?;
959
960 base64_decode(base64_data)
961 }
962
963 // ─── HAR / Trace capture ─────────────────────────────────────────────
964
965 /// Start capturing HTTP Archive (HAR) data for this page.
966 ///
967 /// Enables the Network domain and begins collecting request/response
968 /// entries. Use [`HarCapture::stop`] to get the complete HAR archive,
969 /// or [`HarCapture::export`] for a snapshot while continuing to capture.
970 ///
971 /// # Example
972 ///
973 /// ```no_run
974 /// # use ferrous_browser::{Browser, WaitUntil};
975 /// # #[tokio::main]
976 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
977 /// let browser = Browser::launch_chrome(None).await?;
978 /// let page = browser.new_page().await?;
979 ///
980 /// let mut har = page.start_har_capture().await?;
981 /// page.goto("https://example.com", WaitUntil::Load).await?;
982 ///
983 /// let archive = har.stop().await;
984 /// let json = serde_json::to_string_pretty(&archive)?;
985 /// # Ok(())
986 /// # }
987 /// ```
988 pub async fn start_har_capture(&self) -> Result<HarCapture> {
989 let capture = HarCapture::new(self.cdp.clone(), self.session_id.clone());
990 capture.start().await?;
991 Ok(capture)
992 }
993
994 // ─── Internal ─────────────────────────────────────────────────────────
995
996 /// Send a command to this page's session
997 pub(crate) async fn send_command(
998 &self,
999 method: String,
1000 params: Option<Value>,
1001 ) -> Result<Value> {
1002 self.cdp
1003 .send_command_with_session(&self.session_id, method, params)
1004 .await
1005 }
1006}
1007
1008// ─── Utilities ────────────────────────────────────────────────────────────────
1009
1010/// Escape single-quotes in a CSS selector used inside JS string literals.
1011fn escape_selector(s: &str) -> String {
1012 s.replace('\'', "\\'")
1013}
1014
1015/// Decode base64 string to bytes
1016fn base64_decode(s: &str) -> Result<Vec<u8>> {
1017 use base64::Engine;
1018 let engine = base64::engine::general_purpose::STANDARD;
1019 engine.decode(s).map_err(|e| {
1020 BrowserError::invalid_response("screenshot()", format!("base64 decode failed: {e}"))
1021 })
1022}
1023
1024// ─── Tests ────────────────────────────────────────────────────────────────────
1025
1026#[cfg(test)]
1027mod tests {
1028 use super::*;
1029
1030 #[test]
1031 fn test_wait_until_default() {
1032 let w: WaitUntil = Default::default();
1033 assert!(matches!(w, WaitUntil::Load));
1034 }
1035
1036 #[test]
1037 fn test_escape_selector_plain() {
1038 assert_eq!(escape_selector("button#id"), "button#id");
1039 }
1040
1041 #[test]
1042 fn test_escape_selector_quotes() {
1043 assert_eq!(escape_selector("input[name='q']"), "input[name=\\'q\\']");
1044 }
1045}