use crate::features::chrome_args::CHROME_ARGS;
use crate::utils::{detect_chrome::get_detect_chrome_executable, log};
use crate::{configuration::Configuration, tokio_stream::StreamExt};
use chromiumoxide::cdp::browser_protocol::browser::{
SetDownloadBehaviorBehavior, SetDownloadBehaviorParamsBuilder,
};
use chromiumoxide::cdp::browser_protocol::{
browser::BrowserContextId, emulation::SetGeolocationOverrideParams, network::CookieParam,
target::CreateTargetParams,
};
use chromiumoxide::error::CdpError;
use chromiumoxide::handler::REQUEST_TIMEOUT;
use chromiumoxide::serde_json;
use chromiumoxide::Page;
use chromiumoxide::{handler::HandlerConfig, Browser, BrowserConfig};
use lazy_static::lazy_static;
#[cfg(feature = "cookies")]
use std::sync::Arc;
use std::time::Duration;
use tokio::task::JoinHandle;
use url::Url;
lazy_static! {
static ref LOOP_BACK_PROXY: bool = std::env::var("LOOP_BACK_PROXY").unwrap_or_default() == "true";
}
#[cfg(feature = "cookies")]
pub fn parse_cookies_with_jar(
jar: &Arc<crate::client::cookie::Jar>,
cookie_str: &str,
url: &Url,
) -> Result<Vec<CookieParam>, String> {
use crate::client::cookie::CookieStore;
if let Some(header_value) = jar.cookies(url) {
let cookie_header_str = header_value.to_str().map_err(|e| e.to_string())?;
let cookie_pairs: Vec<&str> = cookie_header_str.split(';').collect();
let mut cookies = Vec::new();
for pair in cookie_pairs {
let parts: Vec<&str> = pair.trim().splitn(2, '=').collect();
if parts.len() == 2 {
let name = parts[0].trim();
let value = parts[1].trim();
let mut builder = CookieParam::builder()
.name(name)
.value(value)
.url(url.as_str());
if let Some(domain) = url.domain() {
builder = builder.domain(domain.to_string());
}
let path = url.path();
builder = builder.path(if path.is_empty() { "/" } else { path });
if cookie_str.contains("Secure") {
builder = builder.secure(true);
}
if cookie_str.contains("HttpOnly") {
builder = builder.http_only(true);
}
match builder.build() {
Ok(cookie_param) => cookies.push(cookie_param),
Err(e) => return Err(e),
}
} else {
return Err(format!("Invalid cookie pair: {}", pair));
}
}
Ok(cookies)
} else {
Err("No cookies found".to_string())
}
}
#[cfg(not(feature = "cookies"))]
pub fn parse_cookies_with_jar(cookie_str: &str, url: &Url) -> Result<Vec<CookieParam>, String> {
Ok(Default::default())
}
#[cfg(feature = "cookies")]
pub fn seed_jar_from_cookie_header(
jar: &std::sync::Arc<crate::client::cookie::Jar>,
cookie_header: &str,
url: &url::Url,
) -> Result<(), String> {
for pair in cookie_header.split(';') {
let pair = pair.trim();
if pair.is_empty() {
continue;
}
let (name, value) = pair
.split_once('=')
.ok_or_else(|| format!("Invalid cookie pair: {pair}"))?;
let set_cookie = format!("{}={}; Path=/", name.trim(), value.trim());
jar.add_cookie_str(&set_cookie, url);
}
Ok(())
}
#[cfg(all(feature = "cookies", feature = "chrome"))]
pub async fn set_page_cookies(
page: &chromiumoxide::Page,
cookies: Vec<chromiumoxide::cdp::browser_protocol::network::CookieParam>,
) -> Result<(), String> {
use chromiumoxide::cdp::browser_protocol::network::SetCookiesParams;
if cookies.is_empty() {
return Ok(());
}
page.execute(SetCookiesParams::new(cookies))
.await
.map_err(|e| e.to_string())?;
Ok(())
}
#[cfg(feature = "cookies")]
pub fn cookie_params_from_jar(
jar: &std::sync::Arc<crate::client::cookie::Jar>,
url: &url::Url,
) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::CookieParam>, String> {
use crate::client::cookie::CookieStore;
use chromiumoxide::cdp::browser_protocol::network::CookieParam;
let Some(header_value) = jar.cookies(url) else {
return Ok(Vec::new());
};
let s = header_value.to_str().map_err(|e| e.to_string())?;
let mut out = Vec::new();
for pair in s.split(';') {
let pair = pair.trim();
if pair.is_empty() {
continue;
}
let (name, value) = pair
.split_once('=')
.ok_or_else(|| format!("Invalid cookie pair: {pair}"))?;
let cp = CookieParam::builder()
.name(name.trim())
.value(value.trim())
.url(url.as_str())
.build()
.map_err(|e| e.to_string())?;
out.push(cp);
}
Ok(out)
}
#[cfg(feature = "cookies")]
pub async fn set_cookies(
jar: &Arc<crate::client::cookie::Jar>,
config: &Configuration,
url_parsed: &Option<Box<Url>>,
browser: &Browser,
) {
if config.cookie_str.is_empty() {
return;
}
let Some(parsed) = url_parsed.as_deref() else {
return;
};
let _ = seed_jar_from_cookie_header(jar, &config.cookie_str, parsed);
match parse_cookies_with_jar(jar, &config.cookie_str, parsed) {
Ok(cookies) if !cookies.is_empty() => {
let _ = browser.set_cookies(cookies).await;
}
_ => {}
}
}
fn patch_chrome_ai_args(args: &mut Vec<String>) {
for arg in args.iter_mut() {
if arg.starts_with("--disable-features=") {
let features: Vec<&str> = arg["--disable-features=".len()..]
.split(',')
.filter(|f| *f != "OptimizationHints")
.collect();
*arg = format!("--disable-features={}", features.join(","));
}
if arg.starts_with("--enable-features=") {
arg.push_str(",OptimizationGuideOnDeviceModel:BypassPerfRequirement/true,PromptAPIForGeminiNano,PromptAPIForGeminiNanoMultimodalInput");
}
}
if !args.iter().any(|a| a.starts_with("--enable-features=")) {
args.push("--enable-features=OptimizationGuideOnDeviceModel:BypassPerfRequirement/true,PromptAPIForGeminiNano,PromptAPIForGeminiNanoMultimodalInput".to_string());
}
}
#[cfg(not(feature = "chrome_headed"))]
pub fn get_browser_config(
proxies: &Option<Vec<crate::configuration::RequestProxy>>,
intercept: bool,
cache_enabled: bool,
viewport: impl Into<Option<chromiumoxide::handler::viewport::Viewport>>,
request_timeout: &Option<core::time::Duration>,
use_chrome_ai: bool,
) -> Option<BrowserConfig> {
let builder = BrowserConfig::builder()
.disable_default_args()
.no_sandbox()
.request_timeout(match request_timeout.as_ref() {
Some(timeout) => *timeout,
_ => Duration::from_millis(REQUEST_TIMEOUT),
});
let builder = if cache_enabled {
builder.enable_cache()
} else {
builder.disable_cache()
};
let builder = if intercept {
builder.enable_request_intercept()
} else {
builder
};
let builder = match proxies {
Some(proxies) => {
let mut chrome_args = Vec::from(CHROME_ARGS.map(|e| e.replace("://", "=").to_string()));
if use_chrome_ai {
patch_chrome_ai_args(&mut chrome_args);
}
let base_proxies = proxies
.iter()
.filter_map(|p| {
if p.ignore == crate::configuration::ProxyIgnore::Chrome {
None
} else {
Some(p.addr.to_owned())
}
})
.collect::<Vec<String>>();
if !base_proxies.is_empty() {
chrome_args.push(string_concat!(r#"--proxy-server="#, base_proxies.join(";")));
}
builder.args(chrome_args)
}
_ => {
if use_chrome_ai {
let mut chrome_args: Vec<String> =
CHROME_ARGS.iter().map(|e| e.to_string()).collect();
patch_chrome_ai_args(&mut chrome_args);
builder.args(chrome_args)
} else {
builder.args(CHROME_ARGS)
}
}
};
let builder = match get_detect_chrome_executable() {
Some(v) => builder.chrome_executable(v),
_ => builder,
};
match builder.viewport(viewport).build() {
Ok(b) => Some(b),
Err(error) => {
log("", error);
None
}
}
}
#[cfg(feature = "chrome_headed")]
pub fn get_browser_config(
proxies: &Option<Vec<crate::configuration::RequestProxy>>,
intercept: bool,
cache_enabled: bool,
viewport: impl Into<Option<chromiumoxide::handler::viewport::Viewport>>,
request_timeout: &Option<core::time::Duration>,
use_chrome_ai: bool,
) -> Option<BrowserConfig> {
let builder = BrowserConfig::builder()
.disable_default_args()
.no_sandbox()
.request_timeout(match request_timeout.as_ref() {
Some(timeout) => *timeout,
_ => Duration::from_millis(REQUEST_TIMEOUT),
})
.with_head();
let builder = if cache_enabled {
builder.enable_cache()
} else {
builder.disable_cache()
};
let builder = if intercept {
builder.enable_request_intercept()
} else {
builder
};
let mut chrome_args = Vec::from(CHROME_ARGS.map(|e| {
if e == "--headless" {
"".to_string()
} else {
e.replace("://", "=").to_string()
}
}));
if use_chrome_ai {
patch_chrome_ai_args(&mut chrome_args);
}
let builder = match proxies {
Some(proxies) => {
let base_proxies = proxies
.iter()
.filter_map(|p| {
if p.ignore == crate::configuration::ProxyIgnore::Chrome {
None
} else {
Some(p.addr.to_owned())
}
})
.collect::<Vec<String>>();
chrome_args.push(string_concat!(r#"--proxy-server="#, base_proxies.join(";")));
builder.args(chrome_args)
}
_ => builder.args(chrome_args),
};
let builder = match get_detect_chrome_executable() {
Some(v) => builder.chrome_executable(v),
_ => builder,
};
match builder.viewport(viewport).build() {
Ok(b) => Some(b),
Err(error) => {
log("", error);
None
}
}
}
pub fn create_handler_config(config: &Configuration) -> HandlerConfig {
HandlerConfig {
request_timeout: match config.request_timeout.as_ref() {
Some(timeout) => *timeout,
_ => Duration::from_millis(REQUEST_TIMEOUT),
},
request_intercept: config.chrome_intercept.enabled,
cache_enabled: config.cache,
service_worker_enabled: config.service_worker_enabled,
viewport: match config.viewport {
Some(ref v) => Some(chromiumoxide::handler::viewport::Viewport::from(
v.to_owned(),
)),
_ => default_viewport(),
},
ignore_visuals: config.chrome_intercept.block_visuals,
whitelist_patterns: config.chrome_intercept.whitelist_patterns.clone(),
blacklist_patterns: config.chrome_intercept.blacklist_patterns.clone(),
ignore_ads: config.chrome_intercept.block_ads,
ignore_javascript: config.chrome_intercept.block_javascript,
ignore_analytics: config.chrome_intercept.block_analytics,
ignore_stylesheets: config.chrome_intercept.block_stylesheets,
extra_headers: match &config.headers {
Some(headers) => {
let mut hm = crate::utils::header_utils::header_map_to_hash_map(headers.inner());
cleanup_invalid_headers(&mut hm);
if hm.is_empty() {
None
} else {
if cfg!(feature = "real_browser") {
crate::utils::header_utils::rewrite_headers_to_title_case(&mut hm);
}
Some(hm)
}
}
_ => None,
},
intercept_manager: config.chrome_intercept.intercept_manager,
only_html: config.only_html && !config.full_resources,
max_bytes_allowed: config.max_bytes_allowed,
..HandlerConfig::default()
}
}
lazy_static! {
static ref CHROM_BASE: Option<String> = std::env::var("CHROME_URL").ok();
}
pub struct ChromeConnectionFailover {
urls: Vec<String>,
errors: Vec<std::sync::atomic::AtomicU32>,
max_retries: u32,
}
impl ChromeConnectionFailover {
pub fn new(urls: Vec<String>, max_retries: u32) -> Self {
let errors = urls
.iter()
.map(|_| std::sync::atomic::AtomicU32::new(0))
.collect();
Self {
urls,
errors,
max_retries,
}
}
pub async fn connect(
&self,
config: &Configuration,
) -> Option<(Browser, chromiumoxide::Handler)> {
let handler_config_base = create_handler_config(config);
for (idx, url) in self.urls.iter().enumerate() {
let err_count = &self.errors[idx];
for attempt in 0..=self.max_retries {
match Browser::connect_with_config(url.as_str(), handler_config_base.clone()).await
{
Ok(pair) => {
err_count.store(0, std::sync::atomic::Ordering::Relaxed);
if idx > 0 {
log::info!(
"[chrome-failover] connected to endpoint {} ({}) after skipping {}",
idx,
url,
idx
);
}
return Some(pair);
}
Err(e) => {
let n = err_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
log::warn!(
"[chrome-failover] endpoint {} ({}) attempt {}/{} failed: {:?}",
idx,
url,
attempt + 1,
self.max_retries + 1,
e
);
if attempt < self.max_retries {
let backoff = crate::utils::backoff::backoff_delay(attempt, 100, 5_000);
tokio::time::sleep(backoff).await;
} else {
log::warn!(
"[chrome-failover] endpoint {} exhausted ({} errors), trying next",
idx,
n
);
}
}
}
}
}
log::error!(
"[chrome-failover] all {} endpoints exhausted",
self.urls.len()
);
None
}
#[inline]
pub fn len(&self) -> usize {
self.urls.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.urls.is_empty()
}
}
#[cfg(not(feature = "real_browser"))]
pub fn default_viewport() -> Option<chromiumoxide::handler::viewport::Viewport> {
None
}
#[cfg(feature = "real_browser")]
pub fn default_viewport() -> Option<chromiumoxide::handler::viewport::Viewport> {
use super::chrome_viewport::get_random_viewport;
Some(chromiumoxide::handler::viewport::Viewport::from(
get_random_viewport(),
))
}
pub fn cleanup_invalid_headers(hm: &mut std::collections::HashMap<String, String>) {
hm.remove("User-Agent");
hm.remove("user-agent");
hm.remove("host");
hm.remove("Host");
hm.remove("connection");
hm.remove("Connection");
hm.remove("content-length");
hm.remove("Content-Length");
}
pub async fn setup_browser_configuration(
config: &Configuration,
) -> Option<(Browser, chromiumoxide::Handler)> {
let proxies = &config.proxies;
if let Some(ref urls) = config.chrome_connection_urls {
if !urls.is_empty() {
let failover = ChromeConnectionFailover::new(urls.clone(), 3);
return failover.connect(config).await;
}
}
let chrome_connection = if config.chrome_connection_url.is_some() {
config.chrome_connection_url.as_ref()
} else {
CHROM_BASE.as_ref()
};
match chrome_connection {
Some(v) => {
let mut attempts = 0;
let max_retries = 10;
let mut browser = None;
while attempts <= max_retries {
match Browser::connect_with_config(v, create_handler_config(config)).await {
Ok(b) => {
browser = Some(b);
break;
}
Err(err) => {
log::error!("{:?}", err);
attempts += 1;
if attempts > max_retries {
log::error!("Exceeded maximum retry attempts");
break;
}
}
}
}
browser
}
_ => match get_browser_config(
proxies,
config.chrome_intercept.enabled,
config.cache,
match config.viewport {
Some(ref v) => Some(chromiumoxide::handler::viewport::Viewport::from(
v.to_owned(),
)),
_ => default_viewport(),
},
&config.request_timeout,
config
.remote_multimodal
.as_ref()
.map(|m| m.should_use_chrome_ai())
.unwrap_or(false),
) {
Some(mut browser_config) => {
browser_config.ignore_visuals = config.chrome_intercept.block_visuals;
browser_config.ignore_javascript = config.chrome_intercept.block_javascript;
browser_config.ignore_ads = config.chrome_intercept.block_ads;
browser_config.whitelist_patterns =
config.chrome_intercept.whitelist_patterns.clone();
browser_config.blacklist_patterns =
config.chrome_intercept.blacklist_patterns.clone();
browser_config.ignore_stylesheets = config.chrome_intercept.block_stylesheets;
browser_config.ignore_analytics = config.chrome_intercept.block_analytics;
browser_config.extra_headers = match &config.headers {
Some(headers) => {
let mut hm =
crate::utils::header_utils::header_map_to_hash_map(headers.inner());
cleanup_invalid_headers(&mut hm);
if hm.is_empty() {
None
} else {
if cfg!(feature = "real_browser") {
crate::utils::header_utils::rewrite_headers_to_title_case(&mut hm);
}
Some(hm)
}
}
_ => None,
};
browser_config.intercept_manager = config.chrome_intercept.intercept_manager;
browser_config.only_html = config.only_html && !config.full_resources;
match Browser::launch(browser_config).await {
Ok(browser) => Some(browser),
Err(e) => {
log::error!("Browser::launch() failed: {:?}", e);
None
}
}
}
_ => None,
},
}
}
pub async fn launch_browser_base(
config: &Configuration,
url_parsed: &Option<Box<Url>>,
jar: Option<&std::sync::Arc<crate::client::cookie::Jar>>,
) -> Option<(
Browser,
tokio::task::JoinHandle<()>,
Option<BrowserContextId>,
std::sync::Arc<std::sync::atomic::AtomicBool>,
)> {
use chromiumoxide::{
cdp::browser_protocol::target::CreateBrowserContextParams, error::CdpError,
};
let browser_configuration = setup_browser_configuration(config).await;
match browser_configuration {
Some(c) => {
let (mut browser, mut handler) = c;
let mut context_id = None;
let browser_dead = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
let browser_dead_signal = browser_dead.clone();
let handle = tokio::task::spawn(async move {
while let Some(k) = handler.next().await {
if let Err(e) = k {
match e {
CdpError::Ws(_)
| CdpError::LaunchExit(_, _)
| CdpError::LaunchTimeout(_)
| CdpError::LaunchIo(_, _) => {
browser_dead_signal
.store(true, std::sync::atomic::Ordering::Release);
log::error!("Browser handler fatal error: {:?}", e);
break;
}
_ => {
continue;
}
}
}
}
browser_dead_signal.store(true, std::sync::atomic::Ordering::Release);
});
let mut create_content = CreateBrowserContextParams::default();
create_content.dispose_on_detach = Some(true);
if let Some(ref proxies) = config.proxies {
let use_plain_http = proxies.len() >= 2;
for proxie in proxies.iter() {
if proxie.ignore == crate::configuration::ProxyIgnore::Chrome {
continue;
}
let proxie = &proxie.addr;
if !proxie.is_empty() {
if proxie.starts_with("socks://") {
create_content.proxy_server =
Some(proxie.replacen("socks://", "http://", 1));
if use_plain_http {
break;
}
}
if *LOOP_BACK_PROXY && proxie.starts_with("http://localhost") {
create_content.proxy_bypass_list =
Some("<-loopback>;localhost;[::1]".into());
}
create_content.proxy_server = Some(proxie.into());
}
}
}
if let Ok(c) = browser.create_browser_context(create_content).await {
let _ = browser.send_new_context(c.clone()).await;
let _ = context_id.insert(c);
if let Some(jar) = jar {
set_cookies(jar, config, url_parsed, &browser).await;
}
if let Some(id) = &browser.browser_context.id {
let cmd = SetDownloadBehaviorParamsBuilder::default();
if let Ok(cmd) = cmd
.behavior(SetDownloadBehaviorBehavior::Deny)
.events_enabled(false)
.browser_context_id(id.clone())
.build()
{
let _ = browser.execute(cmd).await;
}
}
} else {
handle.abort();
}
Some((browser, handle, context_id, browser_dead))
}
_ => None,
}
}
pub async fn launch_browser(
config: &Configuration,
url_parsed: &Option<Box<Url>>,
) -> Option<(
Browser,
tokio::task::JoinHandle<()>,
Option<BrowserContextId>,
std::sync::Arc<std::sync::atomic::AtomicBool>,
)> {
launch_browser_base(config, url_parsed, None).await
}
pub async fn launch_browser_cookies(
config: &Configuration,
url_parsed: &Option<Box<Url>>,
jar: Option<&Arc<crate::client::cookie::Jar>>,
) -> Option<(
Browser,
tokio::task::JoinHandle<()>,
Option<BrowserContextId>,
std::sync::Arc<std::sync::atomic::AtomicBool>,
)> {
launch_browser_base(config, url_parsed, jar).await
}
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct GeoInfo {
pub ip: Option<String>,
pub network: Option<String>,
pub version: Option<String>,
pub city: Option<String>,
pub region: Option<String>,
pub region_code: Option<String>,
pub country: Option<String>,
pub country_name: Option<String>,
pub country_code: Option<String>,
pub country_code_iso3: Option<String>,
pub country_capital: Option<String>,
pub country_tld: Option<String>,
pub continent_code: Option<String>,
pub in_eu: Option<bool>,
pub postal: Option<String>,
pub latitude: Option<f64>,
pub longitude: Option<f64>,
pub timezone: Option<String>,
pub utc_offset: Option<String>,
pub country_calling_code: Option<String>,
pub currency: Option<String>,
pub currency_name: Option<String>,
pub languages: Option<String>,
pub country_area: Option<f64>,
pub country_population: Option<u64>,
pub asn: Option<String>,
pub org: Option<String>,
}
#[cfg(feature = "serde")]
pub async fn detect_geo_info(new_page: &Page) -> Option<GeoInfo> {
use rand::prelude::IndexedRandom;
let apis = [
"https://ipapi.co/json",
"https://ipinfo.io/json",
"https://ipwho.is/",
];
let url = apis.choose(&mut rand::rng())?;
new_page.goto(*url).await.ok()?;
new_page.wait_for_navigation().await.ok()?;
let html = new_page.content().await.ok()?;
let json_start = html.find("<pre>")? + "<pre>".len();
let json_end = html.find("</pre>")?;
let json = html.get(json_start..json_end)?.trim();
serde_json::from_str(json).ok()
}
#[cfg(not(feature = "serde"))]
pub async fn detect_geo_info(new_page: &Page) -> Option<GeoInfo> {
None
}
pub async fn configure_browser(new_page: &Page, configuration: &Configuration) {
let mut timezone = configuration.timezone_id.is_some();
let mut locale = configuration.locale.is_some();
let mut timezone_value = configuration.timezone_id.clone();
let mut locale_value = configuration.locale.clone();
let mut emulate_geolocation = None;
if configuration.auto_geolocation && configuration.proxies.is_some() && !timezone && !locale {
if let Some(geo) = detect_geo_info(new_page).await {
if let Some(languages) = geo.languages {
if let Some(locale_v) = languages.split(',').next() {
if !locale_v.is_empty() {
locale_value = Some(Box::new(locale_v.into()));
}
}
}
if let Some(timezone_v) = geo.timezone {
if !timezone_v.is_empty() {
timezone_value = Some(Box::new(timezone_v));
}
}
timezone = timezone_value.is_some();
locale = locale_value.is_some();
let mut geo_location_override = SetGeolocationOverrideParams::default();
geo_location_override.latitude = geo.latitude;
geo_location_override.longitude = geo.longitude;
geo_location_override.accuracy = Some(0.7);
emulate_geolocation = Some(geo_location_override);
}
}
if timezone && locale {
let geo = async {
if let Some(geolocation) = emulate_geolocation {
let _ = new_page.emulate_geolocation(geolocation).await;
}
};
let timezone_id = async {
if let Some(timezone_id) = timezone_value.as_deref() {
if !timezone_id.is_empty() {
let _ = new_page
.emulate_timezone(
chromiumoxide::cdp::browser_protocol::emulation::SetTimezoneOverrideParams::new(
timezone_id,
),
)
.await;
}
}
};
let locale = async {
if let Some(locale) = locale_value.as_deref() {
if !locale.is_empty() {
let _ = new_page
.emulate_locale(
chromiumoxide::cdp::browser_protocol::emulation::SetLocaleOverrideParams {
locale: Some(locale.into()),
},
)
.await;
}
}
};
tokio::join!(timezone_id, locale, geo);
} else if timezone {
if let Some(timezone_id) = timezone_value.as_deref() {
if !timezone_id.is_empty() {
let _ = new_page
.emulate_timezone(
chromiumoxide::cdp::browser_protocol::emulation::SetTimezoneOverrideParams::new(
timezone_id,
),
)
.await;
}
}
} else if locale {
if let Some(locale) = locale_value.as_deref() {
if !locale.is_empty() {
let _ = new_page
.emulate_locale(
chromiumoxide::cdp::browser_protocol::emulation::SetLocaleOverrideParams {
locale: Some(locale.into()),
},
)
.await;
}
}
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
pub(crate) async fn attempt_navigation(
url: &str,
browser: &Browser,
request_timeout: &Option<core::time::Duration>,
browser_context_id: &Option<BrowserContextId>,
viewport: &Option<crate::features::chrome_common::Viewport>,
) -> Result<Page, CdpError> {
let mut cdp_params = CreateTargetParams::new(url);
cdp_params.background = Some(browser_context_id.is_some()); cdp_params.browser_context_id.clone_from(browser_context_id);
cdp_params.for_tab = Some(false);
if viewport.is_some() {
browser
.config()
.and_then(|c| c.viewport.as_ref())
.and_then(|b_vp| {
viewport.as_ref().map(|vp| {
let new_viewport = b_vp.width == vp.width && b_vp.height == vp.height;
if !new_viewport {
if vp.width >= 25 {
cdp_params.width = Some(vp.width.into());
}
if vp.height >= 25 {
cdp_params.height = Some(vp.height.into());
}
cdp_params.new_window = Some(true);
}
})
});
}
let page_result = tokio::time::timeout(
match request_timeout {
Some(timeout) => *timeout,
_ => tokio::time::Duration::from_secs(60),
},
browser.new_page(cdp_params),
)
.await;
match page_result {
Ok(page) => page,
Err(_) => Err(CdpError::Timeout),
}
}
pub async fn close_browser(
browser_handle: JoinHandle<()>,
_browser: &Browser,
_context_id: &mut Option<BrowserContextId>,
) {
if !browser_handle.is_finished() {
browser_handle.abort();
}
}
#[cfg(feature = "chrome")]
pub async fn setup_auth_challenge_response(
page: &chromiumoxide::Page,
chrome_intercept: bool,
auth_challenge_response: &Option<crate::configuration::AuthChallengeResponse>,
) {
if chrome_intercept {
if let Some(ref auth_challenge_response) = auth_challenge_response {
if let Ok(mut rp) = page
.event_listener::<chromiumoxide::cdp::browser_protocol::fetch::EventAuthRequired>()
.await
{
let intercept_page = page.clone();
let auth_challenge_response = auth_challenge_response.clone();
crate::utils::spawn_task("auth_interception", async move {
while let Some(event) = rp.next().await {
let u = &event.request.url;
let acr = chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponse::from(auth_challenge_response.clone());
match chromiumoxide::cdp::browser_protocol::fetch::ContinueWithAuthParams::builder()
.request_id(event.request_id.clone())
.auth_challenge_response(acr)
.build() {
Ok(c) => {
if let Err(e) = intercept_page.send_command(c).await
{
log("Failed to fullfill auth challege request: ", e.to_string());
}
}
_ => {
log("Failed to get auth challege request handle ", u);
}
}
}
});
}
}
}
}
#[cfg(feature = "chrome")]
pub async fn setup_chrome_interception_base(
page: &chromiumoxide::Page,
chrome_intercept: bool,
auth_challenge_response: &Option<crate::configuration::AuthChallengeResponse>,
_ignore_visuals: bool,
_host_name: &str,
) -> Option<tokio::task::JoinHandle<()>> {
if chrome_intercept {
setup_auth_challenge_response(page, chrome_intercept, auth_challenge_response).await;
}
None
}
pub async fn setup_chrome_events(chrome_page: &chromiumoxide::Page, config: &Configuration) {
let ua_opt = config.user_agent.as_deref().filter(|ua| !ua.is_empty());
let ua_for_profiles: &str = ua_opt.map_or("", |v| v);
let mut emulation_config =
spider_fingerprint::EmulationConfiguration::setup_defaults(ua_for_profiles);
let stealth_mode = config.stealth_mode;
let use_stealth = stealth_mode.stealth();
let block_ads = config.chrome_intercept.block_ads;
emulation_config.dismiss_dialogs = config.dismiss_dialogs.unwrap_or(true);
emulation_config.fingerprint = config.fingerprint;
emulation_config.tier = stealth_mode;
emulation_config.user_agent_data = Some(!ua_for_profiles.is_empty());
let viewport = config.viewport.as_ref().map(|vp| (*vp).into());
let gpu_profile = spider_fingerprint::profiles::gpu::select_random_gpu_profile(
spider_fingerprint::get_agent_os(ua_for_profiles),
);
let merged_script = spider_fingerprint::emulate_with_profile(
ua_for_profiles,
&emulation_config,
&viewport.as_ref(),
&config.evaluate_on_new_document,
gpu_profile,
);
let should_inject_script =
(use_stealth || config.evaluate_on_new_document.is_some()) && merged_script.is_some();
let hc: u32 = gpu_profile.hardware_concurrency.try_into().unwrap_or(8);
let apply_page_setup = {
async move {
let f_script = async {
if should_inject_script {
let _ = chrome_page
.add_script_to_evaluate_on_new_document(merged_script)
.await;
}
};
let f_adblock = async {
if block_ads {
let _ = chrome_page.set_ad_blocking_enabled(true).await;
}
};
let f_ua = async {
if !ua_for_profiles.is_empty() {
let _ = chrome_page.set_user_agent(ua_for_profiles).await;
}
};
let f_hc = async {
if use_stealth {
let _ = chrome_page.emulate_hardware_concurrency(hc.into()).await;
}
};
tokio::join!(f_script, f_adblock, f_ua, f_hc);
}
};
let disable_log = async {
if config.disable_log {
let _ = chrome_page.disable_log().await;
}
};
let bypass_csp = async {
if config.bypass_csp {
let _ = chrome_page.set_bypass_csp(true).await;
}
};
if tokio::time::timeout(tokio::time::Duration::from_secs(15), async {
tokio::join!(
apply_page_setup,
disable_log,
bypass_csp,
configure_browser(chrome_page, config),
)
})
.await
.is_err()
{
log::error!("failed to setup event handlers within 15 seconds.");
}
}
pub(crate) type BrowserControl = (
std::sync::Arc<chromiumoxide::Browser>,
Option<tokio::task::JoinHandle<()>>,
Option<chromiumoxide::cdp::browser_protocol::browser::BrowserContextId>,
);
#[cfg(all(feature = "smart", not(feature = "decentralized")))]
pub(crate) type OnceBrowser = tokio::sync::OnceCell<Option<BrowserController>>;
pub struct BrowserController {
pub browser: BrowserControl,
pub closed: bool,
pub browser_dead: std::sync::Arc<std::sync::atomic::AtomicBool>,
}
impl BrowserController {
pub(crate) fn new(
browser: BrowserControl,
browser_dead: std::sync::Arc<std::sync::atomic::AtomicBool>,
) -> Self {
BrowserController {
browser,
closed: false,
browser_dead,
}
}
pub fn dispose(&mut self) {
if !self.closed {
self.closed = true;
if let Some(handler) = self.browser.1.take() {
handler.abort();
}
}
}
}
impl Drop for BrowserController {
fn drop(&mut self) {
self.dispose();
}
}
#[cfg(feature = "hedge")]
pub(crate) struct HedgeBrowser {
pub browser: Browser,
pub context_id: Option<BrowserContextId>,
handler: Option<JoinHandle<()>>,
}
#[cfg(feature = "hedge")]
impl HedgeBrowser {
#[inline]
pub fn should_new_connection(
tracker: &crate::utils::hedge::HedgeTracker,
browser_dead: &std::sync::atomic::AtomicBool,
) -> bool {
if browser_dead.load(std::sync::atomic::Ordering::Acquire) {
return true;
}
if tracker.consecutive_errors() >= 3 {
return true;
}
let fires = tracker.hedge_fires();
fires >= 8 && tracker.hedge_win_rate_pct() > 60
}
pub async fn connect(primary: &Browser, config: &Configuration) -> Option<Self> {
let ws_url = primary.websocket_address().clone();
let handler_config = create_handler_config(config);
let (mut browser, mut handler) = match tokio::time::timeout(
Duration::from_secs(10),
Browser::connect_with_config(ws_url, handler_config),
)
.await
{
Ok(Ok(pair)) => pair,
Ok(Err(e)) => {
log::warn!(
"[hedge-chrome] failed to open second WS connection: {:?}",
e
);
return None;
}
Err(_) => {
log::warn!("[hedge-chrome] second WS connection timed out (10s)");
return None;
}
};
let handle = tokio::task::spawn(async move {
while let Some(k) = handler.next().await {
if let Err(e) = k {
match e {
CdpError::Ws(_)
| CdpError::LaunchExit(_, _)
| CdpError::LaunchTimeout(_)
| CdpError::LaunchIo(_, _) => break,
_ => continue,
}
}
}
});
let mut create_ctx =
chromiumoxide::cdp::browser_protocol::target::CreateBrowserContextParams::default();
create_ctx.dispose_on_detach = Some(true);
let context_id = match browser.create_browser_context(create_ctx).await {
Ok(id) => {
let _ = browser.send_new_context(id.clone()).await;
Some(id)
}
Err(e) => {
log::debug!(
"[hedge-chrome] browser context creation failed (non-fatal): {:?}",
e
);
None
}
};
Some(Self {
browser,
context_id,
handler: Some(handle),
})
}
}
#[cfg(feature = "hedge")]
impl Drop for HedgeBrowser {
fn drop(&mut self) {
if let Some(h) = self.handler.take() {
h.abort();
}
}
}
#[cfg(all(feature = "chrome", not(feature = "decentralized")))]
pub(crate) struct TabCloseGuard(Option<chromiumoxide::Page>);
#[cfg(all(feature = "chrome", not(feature = "decentralized")))]
impl TabCloseGuard {
#[inline]
pub fn new(page: chromiumoxide::Page) -> Self {
Self(Some(page))
}
#[inline]
pub fn defuse(mut self) {
self.0 = None;
}
}
#[cfg(all(feature = "chrome", not(feature = "decentralized")))]
impl Drop for TabCloseGuard {
fn drop(&mut self) {
if let Some(page) = self.0.take() {
tokio::task::spawn(async move {
let _ =
tokio::time::timeout(tokio::time::Duration::from_secs(5), page.close()).await;
});
}
}
}