scrapling_browser/config.rs
1//! Configuration types for browser automation sessions.
2//!
3//! This module contains every knob you can turn when launching a browser through
4//! scrapling-browser. The primary entry point is [`BrowserConfig`], which controls
5//! how the browser is launched, how pages are navigated, and what requests are
6//! blocked. For anti-detection scenarios, [`StealthConfig`] wraps a `BrowserConfig`
7//! and adds stealth-specific options such as canvas noise and WebRTC blocking.
8//!
9//! Most fields on `BrowserConfig` have sensible defaults (see the [`Default`] impl),
10//! so you typically only override the two or three settings you care about:
11//!
12//! ```rust
13//! use scrapling_browser::BrowserConfig;
14//!
15//! let config = BrowserConfig {
16//! headless: true,
17//! disable_resources: true,
18//! block_ads: true,
19//! ..Default::default()
20//! };
21//! ```
22//!
23//! When you need to tweak behaviour on a per-request basis without rebuilding the
24//! entire config, create a [`FetchParams`] with only the fields you want to override.
25//! The session's `fetch` method merges those overrides with the base config into a
26//! [`ResolvedFetchParams`] struct that carries the final, concrete values used for
27//! that single navigation.
28//!
29//! # Key types
30//!
31//! | Type | Purpose |
32//! |------|---------|
33//! | [`BrowserConfig`] | Session-level browser settings (headless, proxy, timeouts, etc.) |
34//! | [`StealthConfig`] | Anti-detection wrapper around `BrowserConfig` |
35//! | [`FetchParams`] | Optional per-request overrides |
36//! | [`ResolvedFetchParams`] | Fully resolved values after merging `FetchParams` + `BrowserConfig` |
37//! | [`ProxyConfig`] | Static proxy server credentials |
38//! | [`CookieParam`] | A cookie to inject before navigation |
39//! | [`WaitState`] | Required DOM state of a wait selector |
40//! | [`PageCallback`] | Async closure invoked on a page at setup or post-navigation |
41//! | [`StealthContextOptions`] | Viewport / device emulation values for stealth mode |
42
43use std::collections::HashMap;
44use std::collections::HashSet;
45use std::path::Path;
46
47use crate::ad_domains::AD_DOMAINS;
48use crate::error::{BrowserError, Result};
49
50/// Browser session configuration -- the central struct that controls how the
51/// Playwright browser is launched and how pages are navigated.
52///
53/// This mirrors the Python `PlaywrightConfig` from the original scrapling library.
54/// Every field has a default value (see [`Default`]), so you only need to set the
55/// fields relevant to your use case. Call [`validate`](BrowserConfig::validate)
56/// before passing the config to a session; sessions call it automatically during
57/// construction.
58pub struct BrowserConfig {
59 /// Maximum number of concurrent browser pages in the pool.
60 /// Must be between 1 and 50 inclusive. Higher values allow more parallel fetches
61 /// but consume more memory. Defaults to `1`.
62 pub max_pages: u32,
63
64 /// Whether to launch the browser in headless mode.
65 /// Set to `false` when debugging to see the browser window. Defaults to `true`.
66 pub headless: bool,
67
68 /// Block heavyweight resource types (images, fonts, stylesheets) when `true`.
69 /// This significantly speeds up page loads when you only need the HTML/DOM.
70 /// The exact list of blocked types is defined in [`constants::EXTRA_RESOURCES`].
71 /// Defaults to `false`.
72 pub disable_resources: bool,
73
74 /// Wait for the network-idle event after navigation.
75 /// Useful for SPAs that fetch data after the initial document load, but slows
76 /// down fetches on pages with persistent connections (e.g. WebSocket heartbeats).
77 /// Defaults to `false`.
78 pub network_idle: bool,
79
80 /// Wait for the `DOMContentLoaded` event after navigation.
81 /// This is faster than `network_idle` and sufficient for most server-rendered
82 /// pages. Defaults to `true`.
83 pub load_dom: bool,
84
85 /// Optional CSS selector to wait for before returning the page content.
86 /// Use this when the data you need is rendered asynchronously by JavaScript
87 /// and you know a specific element that signals the content is ready.
88 pub wait_selector: Option<String>,
89
90 /// Required state of the wait selector before proceeding.
91 /// For example, `WaitState::Visible` waits until the element is both present
92 /// and visible on screen. Defaults to [`WaitState::Attached`].
93 pub wait_selector_state: WaitState,
94
95 /// Cookies to inject into the browser context before navigation.
96 /// Useful for authenticated scraping -- set session cookies here to skip login flows.
97 pub cookies: Vec<CookieParam>,
98
99 /// Prepend a Google search navigation to warm the browser session.
100 /// Some bot-detection systems check the browser's navigation history; visiting
101 /// Google first can make the session appear more natural. Defaults to `true`.
102 pub google_search: bool,
103
104 /// Extra delay in milliseconds to sleep after page load stabilisation.
105 /// Use this as a last resort when `wait_selector` and `network_idle` are not
106 /// enough. Defaults to `0` (no extra delay).
107 pub wait_ms: u64,
108
109 /// IANA timezone identifier to emulate in the browser context (e.g. `"America/New_York"`).
110 /// Setting this makes the browser's `Intl` APIs and `Date` objects report the
111 /// chosen timezone, which can help avoid location-based bot detection.
112 pub timezone_id: Option<String>,
113
114 /// Static proxy server configuration.
115 /// Mutually exclusive with `proxy_rotator` -- set one or the other, not both.
116 pub proxy: Option<ProxyConfig>,
117
118 /// Rotating proxy provider that supplies a fresh proxy per request.
119 /// Mutually exclusive with `proxy` -- set one or the other, not both.
120 /// Useful when you need a different IP for each fetch to avoid rate limits.
121 pub proxy_rotator: Option<scrapling_fetch::ProxyRotator>,
122
123 /// Additional HTTP headers sent with every request.
124 /// These are applied via Playwright's `set_extra_http_headers` and will override
125 /// headers of the same name that the browser would normally send.
126 pub extra_headers: HashMap<String, String>,
127
128 /// Navigation and action timeout in milliseconds.
129 /// Applies to `page.goto()`, selector waits, and other timed operations.
130 /// Defaults to `30_000.0` (30 seconds).
131 pub timeout_ms: f64,
132
133 /// Path to a JavaScript file evaluated in every new page context.
134 /// The script runs before any page code, making it ideal for overriding
135 /// `navigator` properties or injecting polyfills. The file must exist on disk.
136 pub init_script: Option<String>,
137
138 /// Path to a persistent user-data directory for the browser profile.
139 /// When set, the browser stores cookies, local storage, and cache across
140 /// sessions, which can help maintain login state between runs.
141 pub user_data_dir: Option<String>,
142
143 /// Locale string (e.g. `"en-US"`) to emulate in the browser context.
144 /// Affects `navigator.language`, `Accept-Language` headers, and date/number
145 /// formatting in JavaScript.
146 pub locale: Option<String>,
147
148 /// Launch with the system-installed Chrome instead of bundled Chromium.
149 /// The system Chrome may have a different fingerprint than Chromium and may
150 /// pass more bot-detection checks. Defaults to `false`.
151 pub real_chrome: bool,
152
153 /// WebSocket URL for connecting to an existing Chrome DevTools Protocol endpoint.
154 /// Must start with `ws://` or `wss://`. When set, the session attaches to a
155 /// running browser instead of launching a new one.
156 pub cdp_url: Option<String>,
157
158 /// Custom User-Agent string to set on the browser context.
159 /// When `None`, the browser uses its built-in default user agent.
160 pub useragent: Option<String>,
161
162 /// Extra command-line flags passed to the browser process.
163 /// These are appended after the default and stealth flags. Harmful
164 /// automation-revealing flags are automatically filtered out.
165 pub extra_flags: Vec<String>,
166
167 /// Set of domain names whose requests will be blocked.
168 /// Blocking is suffix-based: adding `"ads.example.com"` also blocks
169 /// `"sub.ads.example.com"`. See [`intercept::is_domain_blocked`] for details.
170 pub blocked_domains: HashSet<String>,
171
172 /// Merge the built-in ad-domain blocklist into `blocked_domains` when `true`.
173 /// The blocklist contains roughly 3,500 known ad and tracker domains sourced
174 /// from Peter Lowe's list. Defaults to `false`.
175 pub block_ads: bool,
176
177 /// Number of retry attempts for each fetch operation.
178 /// Must be between 1 and 10 inclusive. On failure, the session waits
179 /// `retry_delay_secs` between attempts. Defaults to `3`.
180 pub retries: u32,
181
182 /// Delay in seconds between retry attempts.
183 /// Applies when a fetch fails and there are retries remaining.
184 /// Defaults to `1.0`.
185 pub retry_delay_secs: f64,
186
187 /// URL pattern to capture matching XHR/fetch responses.
188 /// When set, the session intercepts network responses whose URL matches this
189 /// pattern and includes them in the response. Useful for extracting API data
190 /// that the page fetches via AJAX.
191 pub capture_xhr: Option<String>,
192
193 /// Path to a custom browser executable.
194 /// Use this to point at a specific Chrome/Chromium binary instead of the one
195 /// bundled with Playwright. The file must exist on disk.
196 pub executable_path: Option<String>,
197
198 /// Enable DNS-over-HTTPS via Cloudflare's resolver.
199 /// Adds the `--dns-over-https-templates` Chromium flag pointing at Cloudflare's
200 /// `1.1.1.1` DNS endpoint, encrypting DNS queries from the browser process.
201 /// Defaults to `false`.
202 pub dns_over_https: bool,
203
204 /// Arbitrary key-value configuration forwarded to the selector engine.
205 /// This map is passed through to scrapling's selector/parsing layer and can
206 /// control how CSS selectors and smart matching behave.
207 pub selector_config: HashMap<String, serde_json::Value>,
208
209 /// Async callback invoked on each page immediately after creation.
210 /// Use this to perform custom setup like adding request interceptors, injecting
211 /// scripts, or configuring page-level settings before navigation begins.
212 pub page_setup: Option<PageCallback>,
213
214 /// Async callback invoked on each page after navigation completes.
215 /// Use this to perform post-navigation actions like clicking buttons, filling
216 /// forms, or scrolling to trigger lazy-loaded content before the HTML is captured.
217 pub page_action: Option<PageCallback>,
218}
219
220/// Async callback that receives a Playwright page reference.
221///
222/// This type alias defines the signature for [`BrowserConfig::page_setup`] and
223/// [`BrowserConfig::page_action`] callbacks. The closure receives a cloned
224/// `playwright_rs::Page` and must return a pinned, `Send` future that resolves
225/// to `Result<()>`. Because the closure itself must be `Send + Sync`, it can be
226/// shared safely across threads.
227pub type PageCallback = Box<
228 dyn Fn(
229 playwright_rs::Page,
230 )
231 -> std::pin::Pin<Box<dyn std::future::Future<Output = crate::error::Result<()>> + Send>>
232 + Send
233 + Sync,
234>;
235
236impl Default for BrowserConfig {
237 fn default() -> Self {
238 Self {
239 max_pages: 1,
240 headless: true,
241 disable_resources: false,
242 network_idle: false,
243 load_dom: true,
244 wait_selector: None,
245 wait_selector_state: WaitState::Attached,
246 cookies: Vec::new(),
247 google_search: true,
248 wait_ms: 0,
249 timezone_id: None,
250 proxy: None,
251 proxy_rotator: None,
252 extra_headers: HashMap::new(),
253 timeout_ms: 30_000.0,
254 init_script: None,
255 user_data_dir: None,
256 locale: None,
257 real_chrome: false,
258 cdp_url: None,
259 useragent: None,
260 extra_flags: Vec::new(),
261 blocked_domains: HashSet::new(),
262 block_ads: false,
263 retries: 3,
264 retry_delay_secs: 1.0,
265 capture_xhr: None,
266 executable_path: None,
267 dns_over_https: false,
268 selector_config: HashMap::new(),
269 page_setup: None,
270 page_action: None,
271 }
272 }
273}
274
275impl std::fmt::Debug for BrowserConfig {
276 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
277 f.debug_struct("BrowserConfig")
278 .field("headless", &self.headless)
279 .field("timeout_ms", &self.timeout_ms)
280 .field("retries", &self.retries)
281 .field("max_pages", &self.max_pages)
282 .finish_non_exhaustive()
283 }
284}
285
286impl BrowserConfig {
287 /// Validate configuration invariants and populate derived fields.
288 ///
289 /// This method checks that numeric fields are within acceptable ranges, that
290 /// mutually exclusive options are not both set, that file paths exist on disk,
291 /// and that the CDP URL (if any) uses a WebSocket scheme. When `block_ads` is
292 /// `true`, it also merges the built-in ad-domain list into `blocked_domains`.
293 ///
294 /// You do not usually need to call this yourself -- [`DynamicSession::new`] and
295 /// [`StealthySession::new`] call it automatically during construction.
296 pub fn validate(&mut self) -> Result<()> {
297 if !(1..=50).contains(&self.max_pages) {
298 return Err(BrowserError::Config("max_pages must be 1..50".into()));
299 }
300 if !(1..=10).contains(&self.retries) {
301 return Err(BrowserError::Config("retries must be 1..10".into()));
302 }
303 if self.proxy.is_some() && self.proxy_rotator.is_some() {
304 return Err(BrowserError::Config(
305 "cannot use proxy and proxy_rotator together".into(),
306 ));
307 }
308 if let Some(ref cdp) = self.cdp_url {
309 if !cdp.starts_with("ws://") && !cdp.starts_with("wss://") {
310 return Err(BrowserError::Config(
311 "cdp_url must start with ws:// or wss://".into(),
312 ));
313 }
314 }
315 if let Some(ref path) = self.init_script {
316 if !Path::new(path).is_file() {
317 return Err(BrowserError::Config(format!(
318 "init_script not found: {path}"
319 )));
320 }
321 }
322 if let Some(ref path) = self.executable_path {
323 if !Path::new(path).is_file() {
324 return Err(BrowserError::Config(format!(
325 "executable_path not found: {path}"
326 )));
327 }
328 }
329 if self.block_ads {
330 for domain in AD_DOMAINS {
331 self.blocked_domains.insert((*domain).to_owned());
332 }
333 }
334 Ok(())
335 }
336
337 /// Returns `true` if a rotating proxy provider is configured.
338 /// When a rotator is present the session creates a fresh browser context per
339 /// request so each navigation can use a different proxy address.
340 pub fn has_proxy_rotator(&self) -> bool {
341 self.proxy_rotator.is_some()
342 }
343
344 /// Returns `true` if the session will connect via Chrome DevTools Protocol.
345 /// CDP mode attaches to a running browser rather than launching a new process,
346 /// which is useful for connecting to remote or containerised browsers.
347 pub fn is_cdp(&self) -> bool {
348 self.cdp_url.is_some()
349 }
350}
351
352/// Stealth browser configuration -- extends [`BrowserConfig`] with anti-detection options.
353///
354/// Use this instead of a bare `BrowserConfig` when scraping sites that employ bot
355/// detection (e.g. Cloudflare, DataDome, PerimeterX). The stealth layer adds
356/// Chromium CLI flags to block WebRTC leaks, disable WebGL, inject canvas noise,
357/// and optionally solve Cloudflare Turnstile challenges automatically.
358#[derive(Debug)]
359pub struct StealthConfig {
360 /// Underlying browser configuration shared with non-stealth sessions.
361 /// All standard settings (timeout, proxy, headers, etc.) live here.
362 pub base: BrowserConfig,
363
364 /// Allow WebGL rendering (disable to reduce fingerprint surface).
365 /// Some fingerprinting services read WebGL renderer strings to identify the
366 /// GPU and driver version. Set to `false` to disable WebGL entirely.
367 /// Defaults to `true`.
368 pub allow_webgl: bool,
369
370 /// Inject noise into canvas image data to thwart canvas fingerprinting.
371 /// When enabled, small random perturbations are applied to pixel data returned
372 /// by `toDataURL()` and `getImageData()`, making the canvas fingerprint
373 /// non-deterministic. Defaults to `false`.
374 pub hide_canvas: bool,
375
376 /// Disable non-proxied UDP to prevent WebRTC IP leaks.
377 /// Without this flag, WebRTC can reveal the machine's real IP address even
378 /// when a proxy is configured. Enable this whenever you use a proxy.
379 /// Defaults to `false`.
380 pub block_webrtc: bool,
381
382 /// Automatically detect and attempt to solve Cloudflare Turnstile challenges.
383 /// When enabled the session inspects the page after navigation and, if a
384 /// Cloudflare challenge page is detected, attempts to click through it.
385 /// The timeout is automatically raised to at least 60 seconds. Defaults to `false`.
386 pub solve_cloudflare: bool,
387}
388
389impl Default for StealthConfig {
390 fn default() -> Self {
391 Self {
392 base: BrowserConfig::default(),
393 allow_webgl: true,
394 hide_canvas: false,
395 block_webrtc: false,
396 solve_cloudflare: false,
397 }
398 }
399}
400
401impl StealthConfig {
402 /// Validate the stealth configuration and its underlying `BrowserConfig`.
403 /// If `solve_cloudflare` is enabled and the timeout is below 60 seconds, the
404 /// timeout is automatically raised to 60 seconds to give the solver enough time.
405 pub fn validate(&mut self) -> Result<()> {
406 self.base.validate()?;
407 if self.solve_cloudflare && self.base.timeout_ms < 60_000.0 {
408 self.base.timeout_ms = 60_000.0;
409 }
410 Ok(())
411 }
412
413 /// Build additional Chromium command-line flags required by stealth options.
414 /// These flags are appended to the default and stealth args in
415 /// [`engine::build_launch_options`] and control WebRTC, canvas, and WebGL behaviour.
416 pub fn extra_stealth_args(&self) -> Vec<String> {
417 let mut args = Vec::new();
418 if self.block_webrtc {
419 args.push("--webrtc-ip-handling-policy=disable_non_proxied_udp".into());
420 }
421 if self.hide_canvas {
422 args.push("--fingerprinting-canvas-image-data-noise".into());
423 }
424 if !self.allow_webgl {
425 args.push("--disable-webgl".into());
426 args.push("--disable-webgl2".into());
427 }
428 args
429 }
430
431 /// Return default stealth context options (viewport, device emulation, permissions).
432 /// These mimic a typical desktop Chrome session at 1920x1080 with a 2x device pixel
433 /// ratio, dark colour scheme, and pre-granted geolocation/notification permissions.
434 pub fn context_options(&self) -> StealthContextOptions {
435 StealthContextOptions {
436 color_scheme: "dark".into(),
437 device_scale_factor: 2.0,
438 screen_width: 1920,
439 screen_height: 1080,
440 viewport_width: 1920,
441 viewport_height: 1080,
442 is_mobile: false,
443 has_touch: false,
444 ignore_https_errors: true,
445 permissions: vec!["geolocation".into(), "notifications".into()],
446 }
447 }
448}
449
450/// Browser-context options tuned for stealth emulation.
451///
452/// These values are applied when creating a Playwright browser context in stealth
453/// mode. They describe a plausible desktop browsing environment to make the
454/// automated session harder to distinguish from a real user.
455#[derive(Debug, Clone)]
456pub struct StealthContextOptions {
457 /// Preferred color scheme (e.g. `"dark"` or `"light"`).
458 /// Bot detectors sometimes check whether this matches the OS setting.
459 pub color_scheme: String,
460
461 /// Device pixel ratio to emulate.
462 /// A value of `2.0` simulates a Retina/HiDPI display, which is typical for
463 /// modern laptops and helps avoid a low-DPR fingerprint.
464 pub device_scale_factor: f64,
465
466 /// Emulated screen width in pixels.
467 /// Combined with `screen_height`, this sets the reported `screen.width` value
468 /// in JavaScript.
469 pub screen_width: u32,
470
471 /// Emulated screen height in pixels.
472 /// Combined with `screen_width`, this sets the reported `screen.height` value
473 /// in JavaScript.
474 pub screen_height: u32,
475
476 /// Viewport width in pixels.
477 /// This is the visible content area width and affects CSS media queries and
478 /// responsive layouts.
479 pub viewport_width: u32,
480
481 /// Viewport height in pixels.
482 /// This is the visible content area height and affects CSS media queries and
483 /// responsive layouts.
484 pub viewport_height: u32,
485
486 /// Emulate a mobile device when `true`.
487 /// Sets `navigator.userAgentData.mobile` and related hints. Should normally
488 /// be `false` for stealth desktop sessions.
489 pub is_mobile: bool,
490
491 /// Emulate touch support when `true`.
492 /// Sets `navigator.maxTouchPoints` and enables touch events. Should normally
493 /// be `false` for stealth desktop sessions.
494 pub has_touch: bool,
495
496 /// Accept invalid TLS certificates when `true`.
497 /// Useful for scraping internal or staging sites with self-signed certificates.
498 /// Defaults to `true` in stealth mode.
499 pub ignore_https_errors: bool,
500
501 /// Browser permissions to grant automatically.
502 /// Pre-granting permissions like `"geolocation"` and `"notifications"` prevents
503 /// permission prompt dialogs that could stall automation.
504 pub permissions: Vec<String>,
505}
506
507/// Static proxy server connection parameters.
508///
509/// Use this when you have a single proxy endpoint that all requests should route
510/// through. For rotating proxies (a different IP per request), use
511/// [`scrapling_fetch::ProxyRotator`] via [`BrowserConfig::proxy_rotator`] instead.
512#[derive(Debug, Clone)]
513pub struct ProxyConfig {
514 /// Proxy server URL (e.g. `"http://proxy.example.com:8080"`).
515 /// Supports HTTP, HTTPS, and SOCKS5 schemes depending on the proxy provider.
516 pub server: String,
517
518 /// Optional proxy authentication username.
519 /// Required only when the proxy server demands credentials.
520 pub username: Option<String>,
521
522 /// Optional proxy authentication password.
523 /// Required only when the proxy server demands credentials.
524 pub password: Option<String>,
525}
526
527/// Required DOM state of a selector before the wait is satisfied.
528///
529/// Used with [`BrowserConfig::wait_selector_state`] and
530/// [`FetchParams::wait_selector_state`] to control what "ready" means for the
531/// element you are waiting on. For example, `Visible` is stricter than `Attached`
532/// because the element must also have non-zero dimensions and not be hidden by CSS.
533#[derive(Debug, Clone, Copy, PartialEq, Eq)]
534pub enum WaitState {
535 /// The element is present in the DOM (may or may not be visible).
536 /// This is the least restrictive state and the default.
537 Attached,
538
539 /// The element is present in the DOM *and* visible on screen.
540 /// "Visible" means the element has non-zero bounding box dimensions and is not
541 /// hidden via `display: none`, `visibility: hidden`, or `opacity: 0`.
542 Visible,
543
544 /// The element is present in the DOM but *not* visible.
545 /// Useful when you need to wait for an element to be hidden (e.g. a loading
546 /// spinner disappearing).
547 Hidden,
548
549 /// The element has been removed from the DOM entirely.
550 /// Useful when you need to wait for a transient element (e.g. a modal overlay)
551 /// to go away before capturing the page content.
552 Detached,
553}
554
555/// A cookie to inject into the browser context before navigation.
556///
557/// Cookies are added via Playwright's `context.add_cookies()` before the first
558/// `page.goto()`. You can use this for session cookies, authentication tokens,
559/// or consent flags that skip cookie banners.
560#[derive(Debug, Clone)]
561pub struct CookieParam {
562 /// Cookie name (e.g. `"session_id"`).
563 pub name: String,
564
565 /// Cookie value (e.g. `"abc123"`).
566 pub value: String,
567
568 /// Domain the cookie is scoped to (e.g. `".example.com"`).
569 /// When `None`, the cookie is associated with the URL's domain.
570 pub domain: Option<String>,
571
572 /// URL path the cookie is scoped to (e.g. `"/api"`).
573 /// Defaults to `"/"` when `None`.
574 pub path: Option<String>,
575
576 /// Full URL used to infer domain and path when they are omitted.
577 /// Provide this when you want Playwright to derive domain and path automatically.
578 pub url: Option<String>,
579}
580
581/// Per-fetch parameter overrides -- a subset of [`BrowserConfig`] that can be changed
582/// on a per-request basis.
583///
584/// Every field is `Option` -- when `None`, the value falls back to the session's
585/// `BrowserConfig`. Pass a `FetchParams` to [`DynamicSession::fetch`] or
586/// [`StealthySession::fetch`] to override specific settings for a single navigation
587/// without modifying the session-wide configuration.
588#[derive(Debug, Clone, Default)]
589pub struct FetchParams {
590 /// Override the Google-search warm-up flag for this request.
591 pub google_search: Option<bool>,
592 /// Override the navigation timeout in milliseconds.
593 pub timeout_ms: Option<f64>,
594 /// Override the post-load sleep delay in milliseconds.
595 pub wait_ms: Option<u64>,
596 /// Override the extra HTTP headers for this request.
597 pub extra_headers: Option<HashMap<String, String>>,
598 /// Override the resource-blocking flag for this request.
599 pub disable_resources: Option<bool>,
600 /// Override the network-idle wait flag for this request.
601 pub network_idle: Option<bool>,
602 /// Override the DOM-content-loaded wait flag for this request.
603 pub load_dom: Option<bool>,
604 /// CSS selector to wait for before returning, overriding the config default.
605 pub wait_selector: Option<String>,
606 /// Required state of the wait selector, overriding the config default.
607 pub wait_selector_state: Option<WaitState>,
608 /// Override the set of blocked domains for this request.
609 pub blocked_domains: Option<HashSet<String>>,
610 /// Enable Cloudflare challenge solving for this request.
611 pub solve_cloudflare: Option<bool>,
612 /// Override selector-engine configuration for this request.
613 pub selector_config: Option<HashMap<String, serde_json::Value>>,
614}
615
616impl FetchParams {
617 /// Merge these optional overrides with the base `BrowserConfig` to produce resolved values.
618 ///
619 /// For each field, if the `FetchParams` value is `Some`, it wins; otherwise the
620 /// corresponding `BrowserConfig` value is used. The result is a [`ResolvedFetchParams`]
621 /// with no `Option` fields, ready for immediate use during navigation.
622 pub fn merge_with_config(&self, config: &BrowserConfig) -> ResolvedFetchParams {
623 ResolvedFetchParams {
624 google_search: self.google_search.unwrap_or(config.google_search),
625 timeout_ms: self.timeout_ms.unwrap_or(config.timeout_ms),
626 wait_ms: self.wait_ms.unwrap_or(config.wait_ms),
627 extra_headers: self
628 .extra_headers
629 .clone()
630 .unwrap_or_else(|| config.extra_headers.clone()),
631 disable_resources: self.disable_resources.unwrap_or(config.disable_resources),
632 network_idle: self.network_idle.unwrap_or(config.network_idle),
633 load_dom: self.load_dom.unwrap_or(config.load_dom),
634 wait_selector: self
635 .wait_selector
636 .clone()
637 .or_else(|| config.wait_selector.clone()),
638 wait_selector_state: self
639 .wait_selector_state
640 .unwrap_or(config.wait_selector_state),
641 blocked_domains: self
642 .blocked_domains
643 .clone()
644 .unwrap_or_else(|| config.blocked_domains.clone()),
645 solve_cloudflare: self.solve_cloudflare.unwrap_or(false),
646 }
647 }
648}
649
650/// Fully resolved fetch parameters produced by merging [`FetchParams`] with [`BrowserConfig`].
651///
652/// Unlike `FetchParams` (which is all `Option`s), every field here has a concrete
653/// value. This struct is constructed internally by [`FetchParams::merge_with_config`]
654/// and consumed by the session's navigation logic. You will not normally create one
655/// yourself.
656#[derive(Debug, Clone)]
657pub struct ResolvedFetchParams {
658 /// Whether to prepend a Google-search warm-up navigation.
659 pub google_search: bool,
660 /// Navigation timeout in milliseconds.
661 pub timeout_ms: f64,
662 /// Post-load sleep delay in milliseconds.
663 pub wait_ms: u64,
664 /// Extra HTTP headers to send with the request.
665 pub extra_headers: HashMap<String, String>,
666 /// Block heavyweight resource types when `true`.
667 pub disable_resources: bool,
668 /// Wait for the network-idle event after navigation.
669 pub network_idle: bool,
670 /// Wait for `DOMContentLoaded` after navigation.
671 pub load_dom: bool,
672 /// CSS selector to wait for before returning page content.
673 pub wait_selector: Option<String>,
674 /// Required state of the wait selector.
675 pub wait_selector_state: WaitState,
676 /// Domains whose requests should be blocked.
677 pub blocked_domains: HashSet<String>,
678 /// Attempt to solve Cloudflare challenges when `true`.
679 pub solve_cloudflare: bool,
680}