Skip to main content

scrapling_browser/
config.rs

1//! Configuration types for browser automation sessions.
2//!
3//! This module contains every knob you can turn when launching a browser through
4//! scrapling-browser. The primary entry point is [`BrowserConfig`], which controls
5//! how the browser is launched, how pages are navigated, and what requests are
6//! blocked. For anti-detection scenarios, [`StealthConfig`] wraps a `BrowserConfig`
7//! and adds stealth-specific options such as canvas noise and WebRTC blocking.
8//!
9//! Most fields on `BrowserConfig` have sensible defaults (see the [`Default`] impl),
10//! so you typically only override the two or three settings you care about:
11//!
12//! ```rust
13//! use scrapling_browser::BrowserConfig;
14//!
15//! let config = BrowserConfig {
16//!     headless: true,
17//!     disable_resources: true,
18//!     block_ads: true,
19//!     ..Default::default()
20//! };
21//! ```
22//!
23//! When you need to tweak behaviour on a per-request basis without rebuilding the
24//! entire config, create a [`FetchParams`] with only the fields you want to override.
25//! The session's `fetch` method merges those overrides with the base config into a
26//! [`ResolvedFetchParams`] struct that carries the final, concrete values used for
27//! that single navigation.
28//!
29//! # Key types
30//!
31//! | Type | Purpose |
32//! |------|---------|
33//! | [`BrowserConfig`] | Session-level browser settings (headless, proxy, timeouts, etc.) |
34//! | [`StealthConfig`] | Anti-detection wrapper around `BrowserConfig` |
35//! | [`FetchParams`] | Optional per-request overrides |
36//! | [`ResolvedFetchParams`] | Fully resolved values after merging `FetchParams` + `BrowserConfig` |
37//! | [`ProxyConfig`] | Static proxy server credentials |
38//! | [`CookieParam`] | A cookie to inject before navigation |
39//! | [`WaitState`] | Required DOM state of a wait selector |
40//! | [`PageCallback`] | Async closure invoked on a page at setup or post-navigation |
41//! | [`StealthContextOptions`] | Viewport / device emulation values for stealth mode |
42
43use std::collections::HashMap;
44use std::collections::HashSet;
45use std::path::Path;
46
47use crate::ad_domains::AD_DOMAINS;
48use crate::error::{BrowserError, Result};
49
50/// Browser session configuration -- the central struct that controls how the
51/// Playwright browser is launched and how pages are navigated.
52///
53/// This mirrors the Python `PlaywrightConfig` from the original scrapling library.
54/// Every field has a default value (see [`Default`]), so you only need to set the
55/// fields relevant to your use case. Call [`validate`](BrowserConfig::validate)
56/// before passing the config to a session; sessions call it automatically during
57/// construction.
58pub struct BrowserConfig {
59    /// Maximum number of concurrent browser pages in the pool.
60    /// Must be between 1 and 50 inclusive. Higher values allow more parallel fetches
61    /// but consume more memory. Defaults to `1`.
62    pub max_pages: u32,
63
64    /// Whether to launch the browser in headless mode.
65    /// Set to `false` when debugging to see the browser window. Defaults to `true`.
66    pub headless: bool,
67
68    /// Block heavyweight resource types (images, fonts, stylesheets) when `true`.
69    /// This significantly speeds up page loads when you only need the HTML/DOM.
70    /// The exact list of blocked types is defined in [`constants::EXTRA_RESOURCES`].
71    /// Defaults to `false`.
72    pub disable_resources: bool,
73
74    /// Wait for the network-idle event after navigation.
75    /// Useful for SPAs that fetch data after the initial document load, but slows
76    /// down fetches on pages with persistent connections (e.g. WebSocket heartbeats).
77    /// Defaults to `false`.
78    pub network_idle: bool,
79
80    /// Wait for the `DOMContentLoaded` event after navigation.
81    /// This is faster than `network_idle` and sufficient for most server-rendered
82    /// pages. Defaults to `true`.
83    pub load_dom: bool,
84
85    /// Optional CSS selector to wait for before returning the page content.
86    /// Use this when the data you need is rendered asynchronously by JavaScript
87    /// and you know a specific element that signals the content is ready.
88    pub wait_selector: Option<String>,
89
90    /// Required state of the wait selector before proceeding.
91    /// For example, `WaitState::Visible` waits until the element is both present
92    /// and visible on screen. Defaults to [`WaitState::Attached`].
93    pub wait_selector_state: WaitState,
94
95    /// Cookies to inject into the browser context before navigation.
96    /// Useful for authenticated scraping -- set session cookies here to skip login flows.
97    pub cookies: Vec<CookieParam>,
98
99    /// Prepend a Google search navigation to warm the browser session.
100    /// Some bot-detection systems check the browser's navigation history; visiting
101    /// Google first can make the session appear more natural. Defaults to `true`.
102    pub google_search: bool,
103
104    /// Extra delay in milliseconds to sleep after page load stabilisation.
105    /// Use this as a last resort when `wait_selector` and `network_idle` are not
106    /// enough. Defaults to `0` (no extra delay).
107    pub wait_ms: u64,
108
109    /// IANA timezone identifier to emulate in the browser context (e.g. `"America/New_York"`).
110    /// Setting this makes the browser's `Intl` APIs and `Date` objects report the
111    /// chosen timezone, which can help avoid location-based bot detection.
112    pub timezone_id: Option<String>,
113
114    /// Static proxy server configuration.
115    /// Mutually exclusive with `proxy_rotator` -- set one or the other, not both.
116    pub proxy: Option<ProxyConfig>,
117
118    /// Rotating proxy provider that supplies a fresh proxy per request.
119    /// Mutually exclusive with `proxy` -- set one or the other, not both.
120    /// Useful when you need a different IP for each fetch to avoid rate limits.
121    pub proxy_rotator: Option<scrapling_fetch::ProxyRotator>,
122
123    /// Additional HTTP headers sent with every request.
124    /// These are applied via Playwright's `set_extra_http_headers` and will override
125    /// headers of the same name that the browser would normally send.
126    pub extra_headers: HashMap<String, String>,
127
128    /// Navigation and action timeout in milliseconds.
129    /// Applies to `page.goto()`, selector waits, and other timed operations.
130    /// Defaults to `30_000.0` (30 seconds).
131    pub timeout_ms: f64,
132
133    /// Path to a JavaScript file evaluated in every new page context.
134    /// The script runs before any page code, making it ideal for overriding
135    /// `navigator` properties or injecting polyfills. The file must exist on disk.
136    pub init_script: Option<String>,
137
138    /// Path to a persistent user-data directory for the browser profile.
139    /// When set, the browser stores cookies, local storage, and cache across
140    /// sessions, which can help maintain login state between runs.
141    pub user_data_dir: Option<String>,
142
143    /// Locale string (e.g. `"en-US"`) to emulate in the browser context.
144    /// Affects `navigator.language`, `Accept-Language` headers, and date/number
145    /// formatting in JavaScript.
146    pub locale: Option<String>,
147
148    /// Launch with the system-installed Chrome instead of bundled Chromium.
149    /// The system Chrome may have a different fingerprint than Chromium and may
150    /// pass more bot-detection checks. Defaults to `false`.
151    pub real_chrome: bool,
152
153    /// WebSocket URL for connecting to an existing Chrome DevTools Protocol endpoint.
154    /// Must start with `ws://` or `wss://`. When set, the session attaches to a
155    /// running browser instead of launching a new one.
156    pub cdp_url: Option<String>,
157
158    /// Custom User-Agent string to set on the browser context.
159    /// When `None`, the browser uses its built-in default user agent.
160    pub useragent: Option<String>,
161
162    /// Extra command-line flags passed to the browser process.
163    /// These are appended after the default and stealth flags. Harmful
164    /// automation-revealing flags are automatically filtered out.
165    pub extra_flags: Vec<String>,
166
167    /// Set of domain names whose requests will be blocked.
168    /// Blocking is suffix-based: adding `"ads.example.com"` also blocks
169    /// `"sub.ads.example.com"`. See [`intercept::is_domain_blocked`] for details.
170    pub blocked_domains: HashSet<String>,
171
172    /// Merge the built-in ad-domain blocklist into `blocked_domains` when `true`.
173    /// The blocklist contains roughly 3,500 known ad and tracker domains sourced
174    /// from Peter Lowe's list. Defaults to `false`.
175    pub block_ads: bool,
176
177    /// Number of retry attempts for each fetch operation.
178    /// Must be between 1 and 10 inclusive. On failure, the session waits
179    /// `retry_delay_secs` between attempts. Defaults to `3`.
180    pub retries: u32,
181
182    /// Delay in seconds between retry attempts.
183    /// Applies when a fetch fails and there are retries remaining.
184    /// Defaults to `1.0`.
185    pub retry_delay_secs: f64,
186
187    /// URL pattern to capture matching XHR/fetch responses.
188    /// When set, the session intercepts network responses whose URL matches this
189    /// pattern and includes them in the response. Useful for extracting API data
190    /// that the page fetches via AJAX.
191    pub capture_xhr: Option<String>,
192
193    /// Path to a custom browser executable.
194    /// Use this to point at a specific Chrome/Chromium binary instead of the one
195    /// bundled with Playwright. The file must exist on disk.
196    pub executable_path: Option<String>,
197
198    /// Enable DNS-over-HTTPS via Cloudflare's resolver.
199    /// Adds the `--dns-over-https-templates` Chromium flag pointing at Cloudflare's
200    /// `1.1.1.1` DNS endpoint, encrypting DNS queries from the browser process.
201    /// Defaults to `false`.
202    pub dns_over_https: bool,
203
204    /// Arbitrary key-value configuration forwarded to the selector engine.
205    /// This map is passed through to scrapling's selector/parsing layer and can
206    /// control how CSS selectors and smart matching behave.
207    pub selector_config: HashMap<String, serde_json::Value>,
208
209    /// Async callback invoked on each page immediately after creation.
210    /// Use this to perform custom setup like adding request interceptors, injecting
211    /// scripts, or configuring page-level settings before navigation begins.
212    pub page_setup: Option<PageCallback>,
213
214    /// Async callback invoked on each page after navigation completes.
215    /// Use this to perform post-navigation actions like clicking buttons, filling
216    /// forms, or scrolling to trigger lazy-loaded content before the HTML is captured.
217    pub page_action: Option<PageCallback>,
218}
219
220/// Async callback that receives a Playwright page reference.
221///
222/// This type alias defines the signature for [`BrowserConfig::page_setup`] and
223/// [`BrowserConfig::page_action`] callbacks. The closure receives a cloned
224/// `playwright_rs::Page` and must return a pinned, `Send` future that resolves
225/// to `Result<()>`. Because the closure itself must be `Send + Sync`, it can be
226/// shared safely across threads.
227pub type PageCallback = Box<
228    dyn Fn(
229            playwright_rs::Page,
230        )
231            -> std::pin::Pin<Box<dyn std::future::Future<Output = crate::error::Result<()>> + Send>>
232        + Send
233        + Sync,
234>;
235
236impl Default for BrowserConfig {
237    fn default() -> Self {
238        Self {
239            max_pages: 1,
240            headless: true,
241            disable_resources: false,
242            network_idle: false,
243            load_dom: true,
244            wait_selector: None,
245            wait_selector_state: WaitState::Attached,
246            cookies: Vec::new(),
247            google_search: true,
248            wait_ms: 0,
249            timezone_id: None,
250            proxy: None,
251            proxy_rotator: None,
252            extra_headers: HashMap::new(),
253            timeout_ms: 30_000.0,
254            init_script: None,
255            user_data_dir: None,
256            locale: None,
257            real_chrome: false,
258            cdp_url: None,
259            useragent: None,
260            extra_flags: Vec::new(),
261            blocked_domains: HashSet::new(),
262            block_ads: false,
263            retries: 3,
264            retry_delay_secs: 1.0,
265            capture_xhr: None,
266            executable_path: None,
267            dns_over_https: false,
268            selector_config: HashMap::new(),
269            page_setup: None,
270            page_action: None,
271        }
272    }
273}
274
275impl std::fmt::Debug for BrowserConfig {
276    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
277        f.debug_struct("BrowserConfig")
278            .field("headless", &self.headless)
279            .field("timeout_ms", &self.timeout_ms)
280            .field("retries", &self.retries)
281            .field("max_pages", &self.max_pages)
282            .finish_non_exhaustive()
283    }
284}
285
286impl BrowserConfig {
287    /// Validate configuration invariants and populate derived fields.
288    ///
289    /// This method checks that numeric fields are within acceptable ranges, that
290    /// mutually exclusive options are not both set, that file paths exist on disk,
291    /// and that the CDP URL (if any) uses a WebSocket scheme. When `block_ads` is
292    /// `true`, it also merges the built-in ad-domain list into `blocked_domains`.
293    ///
294    /// You do not usually need to call this yourself -- [`DynamicSession::new`] and
295    /// [`StealthySession::new`] call it automatically during construction.
296    pub fn validate(&mut self) -> Result<()> {
297        if !(1..=50).contains(&self.max_pages) {
298            return Err(BrowserError::Config("max_pages must be 1..50".into()));
299        }
300        if !(1..=10).contains(&self.retries) {
301            return Err(BrowserError::Config("retries must be 1..10".into()));
302        }
303        if self.proxy.is_some() && self.proxy_rotator.is_some() {
304            return Err(BrowserError::Config(
305                "cannot use proxy and proxy_rotator together".into(),
306            ));
307        }
308        if let Some(ref cdp) = self.cdp_url {
309            if !cdp.starts_with("ws://") && !cdp.starts_with("wss://") {
310                return Err(BrowserError::Config(
311                    "cdp_url must start with ws:// or wss://".into(),
312                ));
313            }
314        }
315        if let Some(ref path) = self.init_script {
316            if !Path::new(path).is_file() {
317                return Err(BrowserError::Config(format!(
318                    "init_script not found: {path}"
319                )));
320            }
321        }
322        if let Some(ref path) = self.executable_path {
323            if !Path::new(path).is_file() {
324                return Err(BrowserError::Config(format!(
325                    "executable_path not found: {path}"
326                )));
327            }
328        }
329        if self.block_ads {
330            for domain in AD_DOMAINS {
331                self.blocked_domains.insert((*domain).to_owned());
332            }
333        }
334        Ok(())
335    }
336
337    /// Returns `true` if a rotating proxy provider is configured.
338    /// When a rotator is present the session creates a fresh browser context per
339    /// request so each navigation can use a different proxy address.
340    pub fn has_proxy_rotator(&self) -> bool {
341        self.proxy_rotator.is_some()
342    }
343
344    /// Returns `true` if the session will connect via Chrome DevTools Protocol.
345    /// CDP mode attaches to a running browser rather than launching a new process,
346    /// which is useful for connecting to remote or containerised browsers.
347    pub fn is_cdp(&self) -> bool {
348        self.cdp_url.is_some()
349    }
350}
351
352/// Stealth browser configuration -- extends [`BrowserConfig`] with anti-detection options.
353///
354/// Use this instead of a bare `BrowserConfig` when scraping sites that employ bot
355/// detection (e.g. Cloudflare, DataDome, PerimeterX). The stealth layer adds
356/// Chromium CLI flags to block WebRTC leaks, disable WebGL, inject canvas noise,
357/// and optionally solve Cloudflare Turnstile challenges automatically.
358#[derive(Debug)]
359pub struct StealthConfig {
360    /// Underlying browser configuration shared with non-stealth sessions.
361    /// All standard settings (timeout, proxy, headers, etc.) live here.
362    pub base: BrowserConfig,
363
364    /// Allow WebGL rendering (disable to reduce fingerprint surface).
365    /// Some fingerprinting services read WebGL renderer strings to identify the
366    /// GPU and driver version. Set to `false` to disable WebGL entirely.
367    /// Defaults to `true`.
368    pub allow_webgl: bool,
369
370    /// Inject noise into canvas image data to thwart canvas fingerprinting.
371    /// When enabled, small random perturbations are applied to pixel data returned
372    /// by `toDataURL()` and `getImageData()`, making the canvas fingerprint
373    /// non-deterministic. Defaults to `false`.
374    pub hide_canvas: bool,
375
376    /// Disable non-proxied UDP to prevent WebRTC IP leaks.
377    /// Without this flag, WebRTC can reveal the machine's real IP address even
378    /// when a proxy is configured. Enable this whenever you use a proxy.
379    /// Defaults to `false`.
380    pub block_webrtc: bool,
381
382    /// Automatically detect and attempt to solve Cloudflare Turnstile challenges.
383    /// When enabled the session inspects the page after navigation and, if a
384    /// Cloudflare challenge page is detected, attempts to click through it.
385    /// The timeout is automatically raised to at least 60 seconds. Defaults to `false`.
386    pub solve_cloudflare: bool,
387}
388
389impl Default for StealthConfig {
390    fn default() -> Self {
391        Self {
392            base: BrowserConfig::default(),
393            allow_webgl: true,
394            hide_canvas: false,
395            block_webrtc: false,
396            solve_cloudflare: false,
397        }
398    }
399}
400
401impl StealthConfig {
402    /// Validate the stealth configuration and its underlying `BrowserConfig`.
403    /// If `solve_cloudflare` is enabled and the timeout is below 60 seconds, the
404    /// timeout is automatically raised to 60 seconds to give the solver enough time.
405    pub fn validate(&mut self) -> Result<()> {
406        self.base.validate()?;
407        if self.solve_cloudflare && self.base.timeout_ms < 60_000.0 {
408            self.base.timeout_ms = 60_000.0;
409        }
410        Ok(())
411    }
412
413    /// Build additional Chromium command-line flags required by stealth options.
414    /// These flags are appended to the default and stealth args in
415    /// [`engine::build_launch_options`] and control WebRTC, canvas, and WebGL behaviour.
416    pub fn extra_stealth_args(&self) -> Vec<String> {
417        let mut args = Vec::new();
418        if self.block_webrtc {
419            args.push("--webrtc-ip-handling-policy=disable_non_proxied_udp".into());
420        }
421        if self.hide_canvas {
422            args.push("--fingerprinting-canvas-image-data-noise".into());
423        }
424        if !self.allow_webgl {
425            args.push("--disable-webgl".into());
426            args.push("--disable-webgl2".into());
427        }
428        args
429    }
430
431    /// Return default stealth context options (viewport, device emulation, permissions).
432    /// These mimic a typical desktop Chrome session at 1920x1080 with a 2x device pixel
433    /// ratio, dark colour scheme, and pre-granted geolocation/notification permissions.
434    pub fn context_options(&self) -> StealthContextOptions {
435        StealthContextOptions {
436            color_scheme: "dark".into(),
437            device_scale_factor: 2.0,
438            screen_width: 1920,
439            screen_height: 1080,
440            viewport_width: 1920,
441            viewport_height: 1080,
442            is_mobile: false,
443            has_touch: false,
444            ignore_https_errors: true,
445            permissions: vec!["geolocation".into(), "notifications".into()],
446        }
447    }
448}
449
450/// Browser-context options tuned for stealth emulation.
451///
452/// These values are applied when creating a Playwright browser context in stealth
453/// mode. They describe a plausible desktop browsing environment to make the
454/// automated session harder to distinguish from a real user.
455#[derive(Debug, Clone)]
456pub struct StealthContextOptions {
457    /// Preferred color scheme (e.g. `"dark"` or `"light"`).
458    /// Bot detectors sometimes check whether this matches the OS setting.
459    pub color_scheme: String,
460
461    /// Device pixel ratio to emulate.
462    /// A value of `2.0` simulates a Retina/HiDPI display, which is typical for
463    /// modern laptops and helps avoid a low-DPR fingerprint.
464    pub device_scale_factor: f64,
465
466    /// Emulated screen width in pixels.
467    /// Combined with `screen_height`, this sets the reported `screen.width` value
468    /// in JavaScript.
469    pub screen_width: u32,
470
471    /// Emulated screen height in pixels.
472    /// Combined with `screen_width`, this sets the reported `screen.height` value
473    /// in JavaScript.
474    pub screen_height: u32,
475
476    /// Viewport width in pixels.
477    /// This is the visible content area width and affects CSS media queries and
478    /// responsive layouts.
479    pub viewport_width: u32,
480
481    /// Viewport height in pixels.
482    /// This is the visible content area height and affects CSS media queries and
483    /// responsive layouts.
484    pub viewport_height: u32,
485
486    /// Emulate a mobile device when `true`.
487    /// Sets `navigator.userAgentData.mobile` and related hints. Should normally
488    /// be `false` for stealth desktop sessions.
489    pub is_mobile: bool,
490
491    /// Emulate touch support when `true`.
492    /// Sets `navigator.maxTouchPoints` and enables touch events. Should normally
493    /// be `false` for stealth desktop sessions.
494    pub has_touch: bool,
495
496    /// Accept invalid TLS certificates when `true`.
497    /// Useful for scraping internal or staging sites with self-signed certificates.
498    /// Defaults to `true` in stealth mode.
499    pub ignore_https_errors: bool,
500
501    /// Browser permissions to grant automatically.
502    /// Pre-granting permissions like `"geolocation"` and `"notifications"` prevents
503    /// permission prompt dialogs that could stall automation.
504    pub permissions: Vec<String>,
505}
506
507/// Static proxy server connection parameters.
508///
509/// Use this when you have a single proxy endpoint that all requests should route
510/// through. For rotating proxies (a different IP per request), use
511/// [`scrapling_fetch::ProxyRotator`] via [`BrowserConfig::proxy_rotator`] instead.
512#[derive(Debug, Clone)]
513pub struct ProxyConfig {
514    /// Proxy server URL (e.g. `"http://proxy.example.com:8080"`).
515    /// Supports HTTP, HTTPS, and SOCKS5 schemes depending on the proxy provider.
516    pub server: String,
517
518    /// Optional proxy authentication username.
519    /// Required only when the proxy server demands credentials.
520    pub username: Option<String>,
521
522    /// Optional proxy authentication password.
523    /// Required only when the proxy server demands credentials.
524    pub password: Option<String>,
525}
526
527/// Required DOM state of a selector before the wait is satisfied.
528///
529/// Used with [`BrowserConfig::wait_selector_state`] and
530/// [`FetchParams::wait_selector_state`] to control what "ready" means for the
531/// element you are waiting on. For example, `Visible` is stricter than `Attached`
532/// because the element must also have non-zero dimensions and not be hidden by CSS.
533#[derive(Debug, Clone, Copy, PartialEq, Eq)]
534pub enum WaitState {
535    /// The element is present in the DOM (may or may not be visible).
536    /// This is the least restrictive state and the default.
537    Attached,
538
539    /// The element is present in the DOM *and* visible on screen.
540    /// "Visible" means the element has non-zero bounding box dimensions and is not
541    /// hidden via `display: none`, `visibility: hidden`, or `opacity: 0`.
542    Visible,
543
544    /// The element is present in the DOM but *not* visible.
545    /// Useful when you need to wait for an element to be hidden (e.g. a loading
546    /// spinner disappearing).
547    Hidden,
548
549    /// The element has been removed from the DOM entirely.
550    /// Useful when you need to wait for a transient element (e.g. a modal overlay)
551    /// to go away before capturing the page content.
552    Detached,
553}
554
555/// A cookie to inject into the browser context before navigation.
556///
557/// Cookies are added via Playwright's `context.add_cookies()` before the first
558/// `page.goto()`. You can use this for session cookies, authentication tokens,
559/// or consent flags that skip cookie banners.
560#[derive(Debug, Clone)]
561pub struct CookieParam {
562    /// Cookie name (e.g. `"session_id"`).
563    pub name: String,
564
565    /// Cookie value (e.g. `"abc123"`).
566    pub value: String,
567
568    /// Domain the cookie is scoped to (e.g. `".example.com"`).
569    /// When `None`, the cookie is associated with the URL's domain.
570    pub domain: Option<String>,
571
572    /// URL path the cookie is scoped to (e.g. `"/api"`).
573    /// Defaults to `"/"` when `None`.
574    pub path: Option<String>,
575
576    /// Full URL used to infer domain and path when they are omitted.
577    /// Provide this when you want Playwright to derive domain and path automatically.
578    pub url: Option<String>,
579}
580
581/// Per-fetch parameter overrides -- a subset of [`BrowserConfig`] that can be changed
582/// on a per-request basis.
583///
584/// Every field is `Option` -- when `None`, the value falls back to the session's
585/// `BrowserConfig`. Pass a `FetchParams` to [`DynamicSession::fetch`] or
586/// [`StealthySession::fetch`] to override specific settings for a single navigation
587/// without modifying the session-wide configuration.
588#[derive(Debug, Clone, Default)]
589pub struct FetchParams {
590    /// Override the Google-search warm-up flag for this request.
591    pub google_search: Option<bool>,
592    /// Override the navigation timeout in milliseconds.
593    pub timeout_ms: Option<f64>,
594    /// Override the post-load sleep delay in milliseconds.
595    pub wait_ms: Option<u64>,
596    /// Override the extra HTTP headers for this request.
597    pub extra_headers: Option<HashMap<String, String>>,
598    /// Override the resource-blocking flag for this request.
599    pub disable_resources: Option<bool>,
600    /// Override the network-idle wait flag for this request.
601    pub network_idle: Option<bool>,
602    /// Override the DOM-content-loaded wait flag for this request.
603    pub load_dom: Option<bool>,
604    /// CSS selector to wait for before returning, overriding the config default.
605    pub wait_selector: Option<String>,
606    /// Required state of the wait selector, overriding the config default.
607    pub wait_selector_state: Option<WaitState>,
608    /// Override the set of blocked domains for this request.
609    pub blocked_domains: Option<HashSet<String>>,
610    /// Enable Cloudflare challenge solving for this request.
611    pub solve_cloudflare: Option<bool>,
612    /// Override selector-engine configuration for this request.
613    pub selector_config: Option<HashMap<String, serde_json::Value>>,
614}
615
616impl FetchParams {
617    /// Merge these optional overrides with the base `BrowserConfig` to produce resolved values.
618    ///
619    /// For each field, if the `FetchParams` value is `Some`, it wins; otherwise the
620    /// corresponding `BrowserConfig` value is used. The result is a [`ResolvedFetchParams`]
621    /// with no `Option` fields, ready for immediate use during navigation.
622    pub fn merge_with_config(&self, config: &BrowserConfig) -> ResolvedFetchParams {
623        ResolvedFetchParams {
624            google_search: self.google_search.unwrap_or(config.google_search),
625            timeout_ms: self.timeout_ms.unwrap_or(config.timeout_ms),
626            wait_ms: self.wait_ms.unwrap_or(config.wait_ms),
627            extra_headers: self
628                .extra_headers
629                .clone()
630                .unwrap_or_else(|| config.extra_headers.clone()),
631            disable_resources: self.disable_resources.unwrap_or(config.disable_resources),
632            network_idle: self.network_idle.unwrap_or(config.network_idle),
633            load_dom: self.load_dom.unwrap_or(config.load_dom),
634            wait_selector: self
635                .wait_selector
636                .clone()
637                .or_else(|| config.wait_selector.clone()),
638            wait_selector_state: self
639                .wait_selector_state
640                .unwrap_or(config.wait_selector_state),
641            blocked_domains: self
642                .blocked_domains
643                .clone()
644                .unwrap_or_else(|| config.blocked_domains.clone()),
645            solve_cloudflare: self.solve_cloudflare.unwrap_or(false),
646        }
647    }
648}
649
650/// Fully resolved fetch parameters produced by merging [`FetchParams`] with [`BrowserConfig`].
651///
652/// Unlike `FetchParams` (which is all `Option`s), every field here has a concrete
653/// value. This struct is constructed internally by [`FetchParams::merge_with_config`]
654/// and consumed by the session's navigation logic. You will not normally create one
655/// yourself.
656#[derive(Debug, Clone)]
657pub struct ResolvedFetchParams {
658    /// Whether to prepend a Google-search warm-up navigation.
659    pub google_search: bool,
660    /// Navigation timeout in milliseconds.
661    pub timeout_ms: f64,
662    /// Post-load sleep delay in milliseconds.
663    pub wait_ms: u64,
664    /// Extra HTTP headers to send with the request.
665    pub extra_headers: HashMap<String, String>,
666    /// Block heavyweight resource types when `true`.
667    pub disable_resources: bool,
668    /// Wait for the network-idle event after navigation.
669    pub network_idle: bool,
670    /// Wait for `DOMContentLoaded` after navigation.
671    pub load_dom: bool,
672    /// CSS selector to wait for before returning page content.
673    pub wait_selector: Option<String>,
674    /// Required state of the wait selector.
675    pub wait_selector_state: WaitState,
676    /// Domains whose requests should be blocked.
677    pub blocked_domains: HashSet<String>,
678    /// Attempt to solve Cloudflare challenges when `true`.
679    pub solve_cloudflare: bool,
680}