use std::collections::HashMap;
use std::collections::HashSet;
use std::path::Path;
use crate::ad_domains::AD_DOMAINS;
use crate::error::{BrowserError, Result};
pub struct BrowserConfig {
pub max_pages: u32,
pub headless: bool,
pub disable_resources: bool,
pub network_idle: bool,
pub load_dom: bool,
pub wait_selector: Option<String>,
pub wait_selector_state: WaitState,
pub cookies: Vec<CookieParam>,
pub google_search: bool,
pub wait_ms: u64,
pub timezone_id: Option<String>,
pub proxy: Option<ProxyConfig>,
pub proxy_rotator: Option<scrapling_fetch::ProxyRotator>,
pub extra_headers: HashMap<String, String>,
pub timeout_ms: f64,
pub init_script: Option<String>,
pub user_data_dir: Option<String>,
pub locale: Option<String>,
pub real_chrome: bool,
pub cdp_url: Option<String>,
pub useragent: Option<String>,
pub extra_flags: Vec<String>,
pub blocked_domains: HashSet<String>,
pub block_ads: bool,
pub retries: u32,
pub retry_delay_secs: f64,
pub capture_xhr: Option<String>,
pub executable_path: Option<String>,
pub dns_over_https: bool,
pub selector_config: HashMap<String, serde_json::Value>,
pub page_setup: Option<PageCallback>,
pub page_action: Option<PageCallback>,
}
pub type PageCallback = Box<
dyn Fn(
playwright_rs::Page,
)
-> std::pin::Pin<Box<dyn std::future::Future<Output = crate::error::Result<()>> + Send>>
+ Send
+ Sync,
>;
impl Default for BrowserConfig {
fn default() -> Self {
Self {
max_pages: 1,
headless: true,
disable_resources: false,
network_idle: false,
load_dom: true,
wait_selector: None,
wait_selector_state: WaitState::Attached,
cookies: Vec::new(),
google_search: true,
wait_ms: 0,
timezone_id: None,
proxy: None,
proxy_rotator: None,
extra_headers: HashMap::new(),
timeout_ms: 30_000.0,
init_script: None,
user_data_dir: None,
locale: None,
real_chrome: false,
cdp_url: None,
useragent: None,
extra_flags: Vec::new(),
blocked_domains: HashSet::new(),
block_ads: false,
retries: 3,
retry_delay_secs: 1.0,
capture_xhr: None,
executable_path: None,
dns_over_https: false,
selector_config: HashMap::new(),
page_setup: None,
page_action: None,
}
}
}
impl std::fmt::Debug for BrowserConfig {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BrowserConfig")
.field("headless", &self.headless)
.field("timeout_ms", &self.timeout_ms)
.field("retries", &self.retries)
.field("max_pages", &self.max_pages)
.finish_non_exhaustive()
}
}
impl BrowserConfig {
pub fn validate(&mut self) -> Result<()> {
if !(1..=50).contains(&self.max_pages) {
return Err(BrowserError::Config("max_pages must be 1..50".into()));
}
if !(1..=10).contains(&self.retries) {
return Err(BrowserError::Config("retries must be 1..10".into()));
}
if self.proxy.is_some() && self.proxy_rotator.is_some() {
return Err(BrowserError::Config(
"cannot use proxy and proxy_rotator together".into(),
));
}
if let Some(ref cdp) = self.cdp_url {
if !cdp.starts_with("ws://") && !cdp.starts_with("wss://") {
return Err(BrowserError::Config(
"cdp_url must start with ws:// or wss://".into(),
));
}
}
if let Some(ref path) = self.init_script {
if !Path::new(path).is_file() {
return Err(BrowserError::Config(format!(
"init_script not found: {path}"
)));
}
}
if let Some(ref path) = self.executable_path {
if !Path::new(path).is_file() {
return Err(BrowserError::Config(format!(
"executable_path not found: {path}"
)));
}
}
if self.block_ads {
for domain in AD_DOMAINS {
self.blocked_domains.insert((*domain).to_owned());
}
}
Ok(())
}
pub fn has_proxy_rotator(&self) -> bool {
self.proxy_rotator.is_some()
}
pub fn is_cdp(&self) -> bool {
self.cdp_url.is_some()
}
}
#[derive(Debug)]
pub struct StealthConfig {
pub base: BrowserConfig,
pub allow_webgl: bool,
pub hide_canvas: bool,
pub block_webrtc: bool,
pub solve_cloudflare: bool,
}
impl Default for StealthConfig {
fn default() -> Self {
Self {
base: BrowserConfig::default(),
allow_webgl: true,
hide_canvas: false,
block_webrtc: false,
solve_cloudflare: false,
}
}
}
impl StealthConfig {
pub fn validate(&mut self) -> Result<()> {
self.base.validate()?;
if self.solve_cloudflare && self.base.timeout_ms < 60_000.0 {
self.base.timeout_ms = 60_000.0;
}
Ok(())
}
pub fn extra_stealth_args(&self) -> Vec<String> {
let mut args = Vec::new();
if self.block_webrtc {
args.push("--webrtc-ip-handling-policy=disable_non_proxied_udp".into());
}
if self.hide_canvas {
args.push("--fingerprinting-canvas-image-data-noise".into());
}
if !self.allow_webgl {
args.push("--disable-webgl".into());
args.push("--disable-webgl2".into());
}
args
}
pub fn context_options(&self) -> StealthContextOptions {
StealthContextOptions {
color_scheme: "dark".into(),
device_scale_factor: 2.0,
screen_width: 1920,
screen_height: 1080,
viewport_width: 1920,
viewport_height: 1080,
is_mobile: false,
has_touch: false,
ignore_https_errors: true,
permissions: vec!["geolocation".into(), "notifications".into()],
}
}
}
#[derive(Debug, Clone)]
pub struct StealthContextOptions {
pub color_scheme: String,
pub device_scale_factor: f64,
pub screen_width: u32,
pub screen_height: u32,
pub viewport_width: u32,
pub viewport_height: u32,
pub is_mobile: bool,
pub has_touch: bool,
pub ignore_https_errors: bool,
pub permissions: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct ProxyConfig {
pub server: String,
pub username: Option<String>,
pub password: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WaitState {
Attached,
Visible,
Hidden,
Detached,
}
#[derive(Debug, Clone)]
pub struct CookieParam {
pub name: String,
pub value: String,
pub domain: Option<String>,
pub path: Option<String>,
pub url: Option<String>,
}
#[derive(Debug, Clone, Default)]
pub struct FetchParams {
pub google_search: Option<bool>,
pub timeout_ms: Option<f64>,
pub wait_ms: Option<u64>,
pub extra_headers: Option<HashMap<String, String>>,
pub disable_resources: Option<bool>,
pub network_idle: Option<bool>,
pub load_dom: Option<bool>,
pub wait_selector: Option<String>,
pub wait_selector_state: Option<WaitState>,
pub blocked_domains: Option<HashSet<String>>,
pub solve_cloudflare: Option<bool>,
pub selector_config: Option<HashMap<String, serde_json::Value>>,
}
impl FetchParams {
pub fn merge_with_config(&self, config: &BrowserConfig) -> ResolvedFetchParams {
ResolvedFetchParams {
google_search: self.google_search.unwrap_or(config.google_search),
timeout_ms: self.timeout_ms.unwrap_or(config.timeout_ms),
wait_ms: self.wait_ms.unwrap_or(config.wait_ms),
extra_headers: self
.extra_headers
.clone()
.unwrap_or_else(|| config.extra_headers.clone()),
disable_resources: self.disable_resources.unwrap_or(config.disable_resources),
network_idle: self.network_idle.unwrap_or(config.network_idle),
load_dom: self.load_dom.unwrap_or(config.load_dom),
wait_selector: self
.wait_selector
.clone()
.or_else(|| config.wait_selector.clone()),
wait_selector_state: self
.wait_selector_state
.unwrap_or(config.wait_selector_state),
blocked_domains: self
.blocked_domains
.clone()
.unwrap_or_else(|| config.blocked_domains.clone()),
solve_cloudflare: self.solve_cloudflare.unwrap_or(false),
}
}
}
#[derive(Debug, Clone)]
pub struct ResolvedFetchParams {
pub google_search: bool,
pub timeout_ms: f64,
pub wait_ms: u64,
pub extra_headers: HashMap<String, String>,
pub disable_resources: bool,
pub network_idle: bool,
pub load_dom: bool,
pub wait_selector: Option<String>,
pub wait_selector_state: WaitState,
pub blocked_domains: HashSet<String>,
pub solve_cloudflare: bool,
}