1use crate::features::chrome_args::CHROME_ARGS;
2use crate::utils::{detect_chrome::get_detect_chrome_executable, log};
3use crate::{configuration::Configuration, tokio_stream::StreamExt};
4use chromiumoxide::cdp::browser_protocol::browser::{
5 SetDownloadBehaviorBehavior, SetDownloadBehaviorParamsBuilder,
6};
7use chromiumoxide::cdp::browser_protocol::{
8 browser::BrowserContextId, emulation::SetGeolocationOverrideParams, network::CookieParam,
9 target::CreateTargetParams,
10};
11use chromiumoxide::error::CdpError;
12use chromiumoxide::handler::REQUEST_TIMEOUT;
13use chromiumoxide::serde_json;
14use chromiumoxide::Page;
15use chromiumoxide::{handler::HandlerConfig, Browser, BrowserConfig};
16use lazy_static::lazy_static;
17#[cfg(feature = "cookies")]
18use std::sync::Arc;
19use std::time::Duration;
20use tokio::task::JoinHandle;
21use url::Url;
22
23lazy_static! {
24 static ref LOOP_BACK_PROXY: bool = std::env::var("LOOP_BACK_PROXY").unwrap_or_default() == "true";
26}
27
28#[cfg(feature = "cookies")]
29pub fn parse_cookies_with_jar(
31 jar: &Arc<crate::client::cookie::Jar>,
32 cookie_str: &str,
33 url: &Url,
34) -> Result<Vec<CookieParam>, String> {
35 use crate::client::cookie::CookieStore;
36
37 if let Some(header_value) = jar.cookies(url) {
39 let cookie_header_str = header_value.to_str().map_err(|e| e.to_string())?;
40 let cookie_pairs: Vec<&str> = cookie_header_str.split(';').collect();
41
42 let mut cookies = Vec::new();
43
44 for pair in cookie_pairs {
45 let parts: Vec<&str> = pair.trim().splitn(2, '=').collect();
46
47 if parts.len() == 2 {
48 let name = parts[0].trim();
49 let value = parts[1].trim();
50
51 let mut builder = CookieParam::builder()
52 .name(name)
53 .value(value)
54 .url(url.as_str());
55
56 if let Some(domain) = url.domain() {
57 builder = builder.domain(domain.to_string());
58 }
59
60 let path = url.path();
61 builder = builder.path(if path.is_empty() { "/" } else { path });
62
63 if cookie_str.contains("Secure") {
64 builder = builder.secure(true);
65 }
66
67 if cookie_str.contains("HttpOnly") {
68 builder = builder.http_only(true);
69 }
70 match builder.build() {
71 Ok(cookie_param) => cookies.push(cookie_param),
72 Err(e) => return Err(e),
73 }
74 } else {
75 return Err(format!("Invalid cookie pair: {}", pair));
76 }
77 }
78
79 Ok(cookies)
80 } else {
81 Err("No cookies found".to_string())
82 }
83}
84
85#[cfg(not(feature = "cookies"))]
87pub fn parse_cookies_with_jar(cookie_str: &str, url: &Url) -> Result<Vec<CookieParam>, String> {
88 Ok(Default::default())
89}
90
91#[cfg(feature = "cookies")]
92pub fn seed_jar_from_cookie_header(
94 jar: &std::sync::Arc<crate::client::cookie::Jar>,
95 cookie_header: &str,
96 url: &url::Url,
97) -> Result<(), String> {
98 for pair in cookie_header.split(';') {
99 let pair = pair.trim();
100 if pair.is_empty() {
101 continue;
102 }
103
104 let (name, value) = pair
105 .split_once('=')
106 .ok_or_else(|| format!("Invalid cookie pair: {pair}"))?;
107
108 let set_cookie = format!("{}={}; Path=/", name.trim(), value.trim());
109 jar.add_cookie_str(&set_cookie, url);
110 }
111 Ok(())
112}
113
114#[cfg(all(feature = "cookies", feature = "chrome"))]
115pub async fn set_page_cookies(
117 page: &chromiumoxide::Page,
118 cookies: Vec<chromiumoxide::cdp::browser_protocol::network::CookieParam>,
119) -> Result<(), String> {
120 use chromiumoxide::cdp::browser_protocol::network::SetCookiesParams;
121
122 if cookies.is_empty() {
123 return Ok(());
124 }
125
126 page.execute(SetCookiesParams::new(cookies))
127 .await
128 .map_err(|e| e.to_string())?;
129
130 Ok(())
131}
132
133#[cfg(feature = "cookies")]
134pub fn cookie_params_from_jar(
136 jar: &std::sync::Arc<crate::client::cookie::Jar>,
137 url: &url::Url,
138) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::CookieParam>, String> {
139 use crate::client::cookie::CookieStore;
140 use chromiumoxide::cdp::browser_protocol::network::CookieParam;
141
142 let Some(header_value) = jar.cookies(url) else {
143 return Ok(Vec::new());
144 };
145
146 let s = header_value.to_str().map_err(|e| e.to_string())?;
147 let mut out = Vec::new();
148
149 for pair in s.split(';') {
150 let pair = pair.trim();
151 if pair.is_empty() {
152 continue;
153 }
154
155 let (name, value) = pair
156 .split_once('=')
157 .ok_or_else(|| format!("Invalid cookie pair: {pair}"))?;
158
159 let cp = CookieParam::builder()
160 .name(name.trim())
161 .value(value.trim())
162 .url(url.as_str())
163 .build()
164 .map_err(|e| e.to_string())?;
165
166 out.push(cp);
167 }
168
169 Ok(out)
170}
171
172#[cfg(feature = "cookies")]
174pub async fn set_cookies(
175 jar: &Arc<crate::client::cookie::Jar>,
176 config: &Configuration,
177 url_parsed: &Option<Box<Url>>,
178 browser: &Browser,
179) {
180 if config.cookie_str.is_empty() {
181 return;
182 }
183
184 let Some(parsed) = url_parsed.as_deref() else {
185 return;
186 };
187
188 let _ = seed_jar_from_cookie_header(jar, &config.cookie_str, parsed);
189
190 match parse_cookies_with_jar(jar, &config.cookie_str, parsed) {
191 Ok(cookies) if !cookies.is_empty() => {
192 let _ = browser.set_cookies(cookies).await;
193 }
194 _ => {}
195 }
196}
197
198fn patch_chrome_ai_args(args: &mut Vec<String>) {
203 for arg in args.iter_mut() {
204 if arg.starts_with("--disable-features=") {
206 let features: Vec<&str> = arg["--disable-features=".len()..]
207 .split(',')
208 .filter(|f| *f != "OptimizationHints")
209 .collect();
210 *arg = format!("--disable-features={}", features.join(","));
211 }
212 if arg.starts_with("--enable-features=") {
214 arg.push_str(",OptimizationGuideOnDeviceModel:BypassPerfRequirement/true,PromptAPIForGeminiNano,PromptAPIForGeminiNanoMultimodalInput");
215 }
216 }
217 if !args.iter().any(|a| a.starts_with("--enable-features=")) {
219 args.push("--enable-features=OptimizationGuideOnDeviceModel:BypassPerfRequirement/true,PromptAPIForGeminiNano,PromptAPIForGeminiNanoMultimodalInput".to_string());
220 }
221}
222
223#[cfg(not(feature = "chrome_headed"))]
225pub fn get_browser_config(
226 proxies: &Option<Vec<crate::configuration::RequestProxy>>,
227 intercept: bool,
228 cache_enabled: bool,
229 viewport: impl Into<Option<chromiumoxide::handler::viewport::Viewport>>,
230 request_timeout: &Option<core::time::Duration>,
231 use_chrome_ai: bool,
232) -> Option<BrowserConfig> {
233 let builder = BrowserConfig::builder()
234 .disable_default_args()
235 .no_sandbox()
236 .request_timeout(match request_timeout.as_ref() {
237 Some(timeout) => *timeout,
238 _ => Duration::from_millis(REQUEST_TIMEOUT),
239 });
240
241 let builder = if cache_enabled {
242 builder.enable_cache()
243 } else {
244 builder.disable_cache()
245 };
246
247 let builder = if intercept {
249 builder.enable_request_intercept()
250 } else {
251 builder
252 };
253
254 let builder = match proxies {
255 Some(proxies) => {
256 let mut chrome_args = Vec::from(CHROME_ARGS.map(|e| e.replace("://", "=").to_string()));
257 if use_chrome_ai {
258 patch_chrome_ai_args(&mut chrome_args);
259 }
260 let base_proxies = proxies
261 .iter()
262 .filter_map(|p| {
263 if p.ignore == crate::configuration::ProxyIgnore::Chrome {
264 None
265 } else {
266 Some(p.addr.to_owned())
267 }
268 })
269 .collect::<Vec<String>>();
270
271 if !base_proxies.is_empty() {
272 chrome_args.push(string_concat!(r#"--proxy-server="#, base_proxies.join(";")));
273 }
274
275 builder.args(chrome_args)
276 }
277 _ => {
278 if use_chrome_ai {
279 let mut chrome_args: Vec<String> =
280 CHROME_ARGS.iter().map(|e| e.to_string()).collect();
281 patch_chrome_ai_args(&mut chrome_args);
282 builder.args(chrome_args)
283 } else {
284 builder.args(CHROME_ARGS)
285 }
286 }
287 };
288 let builder = match get_detect_chrome_executable() {
289 Some(v) => builder.chrome_executable(v),
290 _ => builder,
291 };
292
293 match builder.viewport(viewport).build() {
294 Ok(b) => Some(b),
295 Err(error) => {
296 log("", error);
297 None
298 }
299 }
300}
301
302#[cfg(feature = "chrome_headed")]
304pub fn get_browser_config(
305 proxies: &Option<Vec<crate::configuration::RequestProxy>>,
306 intercept: bool,
307 cache_enabled: bool,
308 viewport: impl Into<Option<chromiumoxide::handler::viewport::Viewport>>,
309 request_timeout: &Option<core::time::Duration>,
310 use_chrome_ai: bool,
311) -> Option<BrowserConfig> {
312 let builder = BrowserConfig::builder()
313 .disable_default_args()
314 .no_sandbox()
315 .request_timeout(match request_timeout.as_ref() {
316 Some(timeout) => *timeout,
317 _ => Duration::from_millis(REQUEST_TIMEOUT),
318 })
319 .with_head();
320
321 let builder = if cache_enabled {
322 builder.enable_cache()
323 } else {
324 builder.disable_cache()
325 };
326
327 let builder = if intercept {
328 builder.enable_request_intercept()
329 } else {
330 builder
331 };
332
333 let mut chrome_args = Vec::from(CHROME_ARGS.map(|e| {
334 if e == "--headless" {
335 "".to_string()
336 } else {
337 e.replace("://", "=").to_string()
338 }
339 }));
340
341 if use_chrome_ai {
342 patch_chrome_ai_args(&mut chrome_args);
343 }
344
345 let builder = match proxies {
346 Some(proxies) => {
347 let base_proxies = proxies
348 .iter()
349 .filter_map(|p| {
350 if p.ignore == crate::configuration::ProxyIgnore::Chrome {
351 None
352 } else {
353 Some(p.addr.to_owned())
354 }
355 })
356 .collect::<Vec<String>>();
357
358 chrome_args.push(string_concat!(r#"--proxy-server="#, base_proxies.join(";")));
359
360 builder.args(chrome_args)
361 }
362 _ => builder.args(chrome_args),
363 };
364 let builder = match get_detect_chrome_executable() {
365 Some(v) => builder.chrome_executable(v),
366 _ => builder,
367 };
368 match builder.viewport(viewport).build() {
369 Ok(b) => Some(b),
370 Err(error) => {
371 log("", error);
372 None
373 }
374 }
375}
376
377pub fn create_handler_config(config: &Configuration) -> HandlerConfig {
379 HandlerConfig {
380 request_timeout: match config.request_timeout.as_ref() {
381 Some(timeout) => *timeout,
382 _ => Duration::from_millis(REQUEST_TIMEOUT),
383 },
384 request_intercept: config.chrome_intercept.enabled,
385 cache_enabled: config.cache,
386 service_worker_enabled: config.service_worker_enabled,
387 viewport: match config.viewport {
388 Some(ref v) => Some(chromiumoxide::handler::viewport::Viewport::from(
389 v.to_owned(),
390 )),
391 _ => default_viewport(),
392 },
393 ignore_visuals: config.chrome_intercept.block_visuals,
394 whitelist_patterns: config.chrome_intercept.whitelist_patterns.clone(),
395 blacklist_patterns: config.chrome_intercept.blacklist_patterns.clone(),
396 ignore_ads: config.chrome_intercept.block_ads,
397 ignore_javascript: config.chrome_intercept.block_javascript,
398 ignore_analytics: config.chrome_intercept.block_analytics,
399 ignore_stylesheets: config.chrome_intercept.block_stylesheets,
400 extra_headers: match &config.headers {
401 Some(headers) => {
402 let mut hm = crate::utils::header_utils::header_map_to_hash_map(headers.inner());
403
404 cleanup_invalid_headers(&mut hm);
405
406 if hm.is_empty() {
407 None
408 } else {
409 if cfg!(feature = "real_browser") {
410 crate::utils::header_utils::rewrite_headers_to_title_case(&mut hm);
411 }
412 Some(hm)
413 }
414 }
415 _ => None,
416 },
417 intercept_manager: config.chrome_intercept.intercept_manager,
418 only_html: config.only_html && !config.full_resources,
419 max_bytes_allowed: config.max_bytes_allowed,
420 ..HandlerConfig::default()
421 }
422}
423
424lazy_static! {
425 static ref CHROM_BASE: Option<String> = std::env::var("CHROME_URL").ok();
426}
427
428pub struct ChromeConnectionFailover {
436 urls: Vec<String>,
437 errors: Vec<std::sync::atomic::AtomicU32>,
439 max_retries: u32,
441}
442
443impl ChromeConnectionFailover {
444 pub fn new(urls: Vec<String>, max_retries: u32) -> Self {
446 let errors = urls
447 .iter()
448 .map(|_| std::sync::atomic::AtomicU32::new(0))
449 .collect();
450 Self {
451 urls,
452 errors,
453 max_retries,
454 }
455 }
456
457 pub async fn connect(
463 &self,
464 config: &Configuration,
465 ) -> Option<(Browser, chromiumoxide::Handler)> {
466 let handler_config_base = create_handler_config(config);
467
468 for (idx, url) in self.urls.iter().enumerate() {
469 let err_count = &self.errors[idx];
470
471 for attempt in 0..=self.max_retries {
472 match Browser::connect_with_config(url.as_str(), handler_config_base.clone()).await
473 {
474 Ok(pair) => {
475 err_count.store(0, std::sync::atomic::Ordering::Relaxed);
477 if idx > 0 {
478 log::info!(
479 "[chrome-failover] connected to endpoint {} ({}) after skipping {}",
480 idx,
481 url,
482 idx
483 );
484 }
485 return Some(pair);
486 }
487 Err(e) => {
488 let n = err_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
489 log::warn!(
490 "[chrome-failover] endpoint {} ({}) attempt {}/{} failed: {:?}",
491 idx,
492 url,
493 attempt + 1,
494 self.max_retries + 1,
495 e
496 );
497 if attempt < self.max_retries {
498 let backoff = crate::utils::backoff::backoff_delay(attempt, 100, 5_000);
499 tokio::time::sleep(backoff).await;
500 } else {
501 log::warn!(
502 "[chrome-failover] endpoint {} exhausted ({} errors), trying next",
503 idx,
504 n
505 );
506 }
507 }
508 }
509 }
510 }
511
512 log::error!(
513 "[chrome-failover] all {} endpoints exhausted",
514 self.urls.len()
515 );
516 None
517 }
518
519 #[inline]
521 pub fn len(&self) -> usize {
522 self.urls.len()
523 }
524
525 #[inline]
527 pub fn is_empty(&self) -> bool {
528 self.urls.is_empty()
529 }
530}
531
532#[cfg(not(feature = "real_browser"))]
534pub fn default_viewport() -> Option<chromiumoxide::handler::viewport::Viewport> {
535 None
536}
537
538#[cfg(feature = "real_browser")]
540pub fn default_viewport() -> Option<chromiumoxide::handler::viewport::Viewport> {
541 use super::chrome_viewport::get_random_viewport;
542 Some(chromiumoxide::handler::viewport::Viewport::from(
543 get_random_viewport(),
544 ))
545}
546
547pub fn cleanup_invalid_headers(hm: &mut std::collections::HashMap<String, String>) {
549 hm.remove("User-Agent");
550 hm.remove("user-agent");
551 hm.remove("host");
552 hm.remove("Host");
553 hm.remove("connection");
554 hm.remove("Connection");
555 hm.remove("content-length");
556 hm.remove("Content-Length");
557}
558
559pub async fn setup_browser_configuration(
561 config: &Configuration,
562) -> Option<(Browser, chromiumoxide::Handler)> {
563 let proxies = &config.proxies;
564
565 if let Some(ref urls) = config.chrome_connection_urls {
567 if !urls.is_empty() {
568 let failover = ChromeConnectionFailover::new(urls.clone(), 3);
569 return failover.connect(config).await;
570 }
571 }
572
573 let chrome_connection = if config.chrome_connection_url.is_some() {
575 config.chrome_connection_url.as_ref()
576 } else {
577 CHROM_BASE.as_ref()
578 };
579
580 match chrome_connection {
581 Some(v) => {
582 let mut attempts = 0;
583 let max_retries = 10;
584 let mut browser = None;
585
586 while attempts <= max_retries {
590 match Browser::connect_with_config(v, create_handler_config(config)).await {
591 Ok(b) => {
592 browser = Some(b);
593 break;
594 }
595 Err(err) => {
596 log::error!("{:?}", err);
597 attempts += 1;
598 if attempts > max_retries {
599 log::error!("Exceeded maximum retry attempts");
600 break;
601 }
602 let backoff = crate::utils::backoff::backoff_delay(attempts, 100, 5_000);
603 tokio::time::sleep(backoff).await;
604 }
605 }
606 }
607
608 browser
609 }
610 _ => match get_browser_config(
611 proxies,
612 config.chrome_intercept.enabled,
613 config.cache,
614 match config.viewport {
615 Some(ref v) => Some(chromiumoxide::handler::viewport::Viewport::from(
616 v.to_owned(),
617 )),
618 _ => default_viewport(),
619 },
620 &config.request_timeout,
621 config
622 .remote_multimodal
623 .as_ref()
624 .map(|m| m.should_use_chrome_ai())
625 .unwrap_or(false),
626 ) {
627 Some(mut browser_config) => {
628 browser_config.ignore_visuals = config.chrome_intercept.block_visuals;
629 browser_config.ignore_javascript = config.chrome_intercept.block_javascript;
630 browser_config.ignore_ads = config.chrome_intercept.block_ads;
631 browser_config.whitelist_patterns =
632 config.chrome_intercept.whitelist_patterns.clone();
633 browser_config.blacklist_patterns =
634 config.chrome_intercept.blacklist_patterns.clone();
635 browser_config.ignore_stylesheets = config.chrome_intercept.block_stylesheets;
636 browser_config.ignore_analytics = config.chrome_intercept.block_analytics;
637 browser_config.extra_headers = match &config.headers {
638 Some(headers) => {
639 let mut hm =
640 crate::utils::header_utils::header_map_to_hash_map(headers.inner());
641
642 cleanup_invalid_headers(&mut hm);
643
644 if hm.is_empty() {
645 None
646 } else {
647 if cfg!(feature = "real_browser") {
648 crate::utils::header_utils::rewrite_headers_to_title_case(&mut hm);
649 }
650 Some(hm)
651 }
652 }
653 _ => None,
654 };
655 browser_config.intercept_manager = config.chrome_intercept.intercept_manager;
656 browser_config.only_html = config.only_html && !config.full_resources;
657
658 match Browser::launch(browser_config).await {
659 Ok(browser) => Some(browser),
660 Err(e) => {
661 log::error!("Browser::launch() failed: {:?}", e);
662 None
663 }
664 }
665 }
666 _ => None,
667 },
668 }
669}
670
671pub async fn launch_browser_base(
673 config: &Configuration,
674 url_parsed: &Option<Box<Url>>,
675 jar: Option<&std::sync::Arc<crate::client::cookie::Jar>>,
676) -> Option<(
677 Browser,
678 tokio::task::JoinHandle<()>,
679 Option<BrowserContextId>,
680 std::sync::Arc<std::sync::atomic::AtomicBool>,
681)> {
682 use chromiumoxide::{
683 cdp::browser_protocol::target::CreateBrowserContextParams, error::CdpError,
684 };
685
686 let browser_configuration = setup_browser_configuration(config).await;
687
688 match browser_configuration {
689 Some(c) => {
690 let (mut browser, mut handler) = c;
691 let mut context_id = None;
692
693 let browser_dead = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
694 let browser_dead_signal = browser_dead.clone();
695
696 let handle = tokio::task::spawn(async move {
699 while let Some(k) = handler.next().await {
700 if let Err(e) = k {
701 match e {
702 CdpError::Ws(_)
703 | CdpError::LaunchExit(_, _)
704 | CdpError::LaunchTimeout(_)
705 | CdpError::LaunchIo(_, _) => {
706 browser_dead_signal
707 .store(true, std::sync::atomic::Ordering::Release);
708 log::error!("Browser handler fatal error: {:?}", e);
709 break;
710 }
711 _ => {
712 continue;
713 }
714 }
715 }
716 }
717 browser_dead_signal.store(true, std::sync::atomic::Ordering::Release);
719 });
720
721 let mut create_content = CreateBrowserContextParams::default();
722 create_content.dispose_on_detach = Some(true);
723
724 if let Some(ref proxies) = config.proxies {
725 let use_plain_http = proxies.len() >= 2;
726
727 for proxie in proxies.iter() {
728 if proxie.ignore == crate::configuration::ProxyIgnore::Chrome {
729 continue;
730 }
731
732 let proxie = &proxie.addr;
733
734 if !proxie.is_empty() {
735 if proxie.starts_with("socks://") {
737 create_content.proxy_server =
738 Some(proxie.replacen("socks://", "http://", 1));
739 if use_plain_http {
741 break;
742 }
743 }
744
745 if *LOOP_BACK_PROXY && proxie.starts_with("http://localhost") {
746 create_content.proxy_bypass_list =
747 Some("<-loopback>;localhost;[::1]".into());
749 }
750
751 create_content.proxy_server = Some(proxie.into());
752 }
753 }
754 }
755
756 if let Ok(c) = browser.create_browser_context(create_content).await {
757 let _ = browser.send_new_context(c.clone()).await;
758 let _ = context_id.insert(c);
759 if let Some(jar) = jar {
760 set_cookies(jar, config, url_parsed, &browser).await;
761 }
762 if let Some(id) = &browser.browser_context.id {
763 let cmd = SetDownloadBehaviorParamsBuilder::default();
764
765 if let Ok(cmd) = cmd
766 .behavior(SetDownloadBehaviorBehavior::Deny)
767 .events_enabled(false)
768 .browser_context_id(id.clone())
769 .build()
770 {
771 let _ = browser.execute(cmd).await;
772 }
773 }
774 } else {
775 handle.abort();
776 }
777
778 Some((browser, handle, context_id, browser_dead))
779 }
780 _ => None,
781 }
782}
783
784pub async fn launch_browser(
786 config: &Configuration,
787 url_parsed: &Option<Box<Url>>,
788) -> Option<(
789 Browser,
790 tokio::task::JoinHandle<()>,
791 Option<BrowserContextId>,
792 std::sync::Arc<std::sync::atomic::AtomicBool>,
793)> {
794 launch_browser_base(config, url_parsed, None).await
795}
796
797pub async fn launch_browser_cookies(
799 config: &Configuration,
800 url_parsed: &Option<Box<Url>>,
801 jar: Option<&Arc<crate::client::cookie::Jar>>,
802) -> Option<(
803 Browser,
804 tokio::task::JoinHandle<()>,
805 Option<BrowserContextId>,
806 std::sync::Arc<std::sync::atomic::AtomicBool>,
807)> {
808 launch_browser_base(config, url_parsed, jar).await
809}
810
811#[derive(Debug)]
813#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
814pub struct GeoInfo {
815 pub ip: Option<String>,
817 pub network: Option<String>,
819 pub version: Option<String>,
821 pub city: Option<String>,
823 pub region: Option<String>,
825 pub region_code: Option<String>,
827 pub country: Option<String>,
829 pub country_name: Option<String>,
831 pub country_code: Option<String>,
833 pub country_code_iso3: Option<String>,
835 pub country_capital: Option<String>,
837 pub country_tld: Option<String>,
839 pub continent_code: Option<String>,
841 pub in_eu: Option<bool>,
843 pub postal: Option<String>,
845 pub latitude: Option<f64>,
847 pub longitude: Option<f64>,
849 pub timezone: Option<String>,
851 pub utc_offset: Option<String>,
853 pub country_calling_code: Option<String>,
855 pub currency: Option<String>,
857 pub currency_name: Option<String>,
859 pub languages: Option<String>,
861 pub country_area: Option<f64>,
863 pub country_population: Option<u64>,
865 pub asn: Option<String>,
867 pub org: Option<String>,
869}
870
871#[cfg(feature = "serde")]
873pub async fn detect_geo_info(new_page: &Page) -> Option<GeoInfo> {
874 use rand::prelude::IndexedRandom;
875 let apis = [
876 "https://ipapi.co/json",
877 "https://ipinfo.io/json",
878 "https://ipwho.is/",
879 ];
880
881 let url = apis.choose(&mut rand::rng())?;
882
883 new_page.goto(*url).await.ok()?;
884 new_page.wait_for_navigation().await.ok()?;
885
886 let html = new_page.content().await.ok()?;
887
888 let json_start = html.find("<pre>")? + "<pre>".len();
889 let json_end = html.find("</pre>")?;
890 let json = html.get(json_start..json_end)?.trim();
891
892 serde_json::from_str(json).ok()
893}
894
895#[cfg(not(feature = "serde"))]
896pub async fn detect_geo_info(new_page: &Page) -> Option<GeoInfo> {
898 None
899}
900
901pub async fn configure_browser(new_page: &Page, configuration: &Configuration) {
903 let mut timezone = configuration.timezone_id.is_some();
904 let mut locale = configuration.locale.is_some();
905
906 let mut timezone_value = configuration.timezone_id.clone();
907 let mut locale_value = configuration.locale.clone();
908
909 let mut emulate_geolocation = None;
910
911 if configuration.auto_geolocation && configuration.proxies.is_some() && !timezone && !locale {
913 if let Some(geo) = detect_geo_info(new_page).await {
914 if let Some(languages) = geo.languages {
915 if let Some(locale_v) = languages.split(',').next() {
916 if !locale_v.is_empty() {
917 locale_value = Some(Box::new(locale_v.into()));
918 }
919 }
920 }
921
922 if let Some(timezone_v) = geo.timezone {
923 if !timezone_v.is_empty() {
924 timezone_value = Some(Box::new(timezone_v));
925 }
926 }
927
928 timezone = timezone_value.is_some();
929 locale = locale_value.is_some();
930
931 let mut geo_location_override = SetGeolocationOverrideParams::default();
932
933 geo_location_override.latitude = geo.latitude;
934 geo_location_override.longitude = geo.longitude;
935 geo_location_override.accuracy = Some(0.7);
936
937 emulate_geolocation = Some(geo_location_override);
938 }
939 }
940
941 if timezone && locale {
942 let geo = async {
943 if let Some(geolocation) = emulate_geolocation {
944 let _ = new_page.emulate_geolocation(geolocation).await;
945 }
946 };
947 let timezone_id = async {
948 if let Some(timezone_id) = timezone_value.as_deref() {
949 if !timezone_id.is_empty() {
950 let _ = new_page
951 .emulate_timezone(
952 chromiumoxide::cdp::browser_protocol::emulation::SetTimezoneOverrideParams::new(
953 timezone_id,
954 ),
955 )
956 .await;
957 }
958 }
959 };
960
961 let locale = async {
962 if let Some(locale) = locale_value.as_deref() {
963 if !locale.is_empty() {
964 let _ = new_page
965 .emulate_locale(
966 chromiumoxide::cdp::browser_protocol::emulation::SetLocaleOverrideParams {
967 locale: Some(locale.into()),
968 },
969 )
970 .await;
971 }
972 }
973 };
974
975 tokio::join!(timezone_id, locale, geo);
976 } else if timezone {
977 if let Some(timezone_id) = timezone_value.as_deref() {
978 if !timezone_id.is_empty() {
979 let _ = new_page
980 .emulate_timezone(
981 chromiumoxide::cdp::browser_protocol::emulation::SetTimezoneOverrideParams::new(
982 timezone_id,
983 ),
984 )
985 .await;
986 }
987 }
988 } else if locale {
989 if let Some(locale) = locale_value.as_deref() {
990 if !locale.is_empty() {
991 let _ = new_page
992 .emulate_locale(
993 chromiumoxide::cdp::browser_protocol::emulation::SetLocaleOverrideParams {
994 locale: Some(locale.into()),
995 },
996 )
997 .await;
998 }
999 }
1000 }
1001}
1002
1003#[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
1005pub(crate) async fn attempt_navigation(
1006 url: &str,
1007 browser: &Browser,
1008 request_timeout: &Option<core::time::Duration>,
1009 browser_context_id: &Option<BrowserContextId>,
1010 viewport: &Option<crate::features::chrome_common::Viewport>,
1011) -> Result<Page, CdpError> {
1012 let mut cdp_params = CreateTargetParams::new(url);
1013
1014 cdp_params.background = Some(browser_context_id.is_some()); cdp_params.browser_context_id.clone_from(browser_context_id);
1016 cdp_params.for_tab = Some(false);
1017
1018 if viewport.is_some() {
1019 browser
1020 .config()
1021 .and_then(|c| c.viewport.as_ref())
1022 .and_then(|b_vp| {
1023 viewport.as_ref().map(|vp| {
1024 let new_viewport = b_vp.width == vp.width && b_vp.height == vp.height;
1025
1026 if !new_viewport {
1027 if vp.width >= 25 {
1028 cdp_params.width = Some(vp.width.into());
1029 }
1030 if vp.height >= 25 {
1031 cdp_params.height = Some(vp.height.into());
1032 }
1033 cdp_params.new_window = Some(true);
1034 }
1035 })
1036 });
1037 }
1038
1039 let page_result = tokio::time::timeout(
1040 match request_timeout {
1041 Some(timeout) => *timeout,
1042 _ => tokio::time::Duration::from_secs(60),
1043 },
1044 browser.new_page(cdp_params),
1045 )
1046 .await;
1047
1048 match page_result {
1049 Ok(page) => page,
1050 Err(_) => Err(CdpError::Timeout),
1051 }
1052}
1053
1054pub async fn close_browser(
1056 browser_handle: JoinHandle<()>,
1057 _browser: &Browser,
1058 _context_id: &mut Option<BrowserContextId>,
1059) {
1060 if !browser_handle.is_finished() {
1061 browser_handle.abort();
1062 }
1063}
1064
1065#[cfg(feature = "chrome")]
1067pub async fn setup_auth_challenge_response(
1068 page: &chromiumoxide::Page,
1069 chrome_intercept: bool,
1070 auth_challenge_response: &Option<crate::configuration::AuthChallengeResponse>,
1071) {
1072 if chrome_intercept {
1073 if let Some(ref auth_challenge_response) = auth_challenge_response {
1074 if let Ok(mut rp) = page
1075 .event_listener::<chromiumoxide::cdp::browser_protocol::fetch::EventAuthRequired>()
1076 .await
1077 {
1078 let intercept_page = page.clone();
1079 let auth_challenge_response = auth_challenge_response.clone();
1080
1081 crate::utils::spawn_task("auth_interception", async move {
1083 while let Some(event) = rp.next().await {
1084 let u = &event.request.url;
1085 let acr = chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponse::from(auth_challenge_response.clone());
1086
1087 match chromiumoxide::cdp::browser_protocol::fetch::ContinueWithAuthParams::builder()
1088 .request_id(event.request_id.clone())
1089 .auth_challenge_response(acr)
1090 .build() {
1091 Ok(c) => {
1092 if let Err(e) = intercept_page.send_command(c).await
1093 {
1094 log("Failed to fullfill auth challege request: ", e.to_string());
1095 }
1096 }
1097 _ => {
1098 log("Failed to get auth challege request handle ", u);
1099 }
1100 }
1101 }
1102 });
1103 }
1104 }
1105 }
1106}
1107
1108#[cfg(feature = "chrome")]
1110pub async fn setup_chrome_interception_base(
1111 page: &chromiumoxide::Page,
1112 chrome_intercept: bool,
1113 auth_challenge_response: &Option<crate::configuration::AuthChallengeResponse>,
1114 _ignore_visuals: bool,
1115 _host_name: &str,
1116) -> Option<tokio::task::JoinHandle<()>> {
1117 if chrome_intercept {
1118 setup_auth_challenge_response(page, chrome_intercept, auth_challenge_response).await;
1119 }
1120 None
1121}
1122
1123pub async fn setup_chrome_events(chrome_page: &chromiumoxide::Page, config: &Configuration) {
1125 let ua_opt = config.user_agent.as_deref().filter(|ua| !ua.is_empty());
1126
1127 let ua_for_profiles: &str = ua_opt.map_or("", |v| v);
1128
1129 let mut emulation_config =
1130 spider_fingerprint::EmulationConfiguration::setup_defaults(ua_for_profiles);
1131
1132 let stealth_mode = config.stealth_mode;
1133 let use_stealth = stealth_mode.stealth();
1134 let block_ads = config.chrome_intercept.block_ads;
1135
1136 emulation_config.dismiss_dialogs = config.dismiss_dialogs.unwrap_or(true);
1137 emulation_config.fingerprint = config.fingerprint;
1138 emulation_config.tier = stealth_mode;
1139 emulation_config.user_agent_data = Some(!ua_for_profiles.is_empty());
1140
1141 let viewport = config.viewport.as_ref().map(|vp| (*vp).into());
1142
1143 let gpu_profile = spider_fingerprint::profiles::gpu::select_random_gpu_profile(
1144 spider_fingerprint::get_agent_os(ua_for_profiles),
1145 );
1146
1147 let merged_script = spider_fingerprint::emulate_with_profile(
1148 ua_for_profiles,
1149 &emulation_config,
1150 &viewport.as_ref(),
1151 &config.evaluate_on_new_document,
1152 gpu_profile,
1153 );
1154
1155 let should_inject_script =
1156 (use_stealth || config.evaluate_on_new_document.is_some()) && merged_script.is_some();
1157
1158 let hc: u32 = gpu_profile.hardware_concurrency.try_into().unwrap_or(8);
1159
1160 let apply_page_setup = {
1161 async move {
1162 let f_script = async {
1163 if should_inject_script {
1164 let _ = chrome_page
1165 .add_script_to_evaluate_on_new_document(merged_script)
1166 .await;
1167 }
1168 };
1169
1170 let f_adblock = async {
1171 if block_ads {
1172 let _ = chrome_page.set_ad_blocking_enabled(true).await;
1173 }
1174 };
1175
1176 let f_ua = async {
1177 if !ua_for_profiles.is_empty() {
1178 let _ = chrome_page.set_user_agent(ua_for_profiles).await;
1179 }
1180 };
1181
1182 let f_hc = async {
1183 if use_stealth {
1184 let _ = chrome_page.emulate_hardware_concurrency(hc.into()).await;
1185 }
1186 };
1187
1188 tokio::join!(f_script, f_adblock, f_ua, f_hc);
1189 }
1190 };
1191
1192 let disable_log = async {
1193 if config.disable_log {
1194 let _ = chrome_page.disable_log().await;
1195 }
1196 };
1197
1198 let bypass_csp = async {
1199 if config.bypass_csp {
1200 let _ = chrome_page.set_bypass_csp(true).await;
1201 }
1202 };
1203
1204 if tokio::time::timeout(tokio::time::Duration::from_secs(15), async {
1205 tokio::join!(
1206 apply_page_setup,
1207 disable_log,
1208 bypass_csp,
1209 configure_browser(chrome_page, config),
1210 )
1211 })
1212 .await
1213 .is_err()
1214 {
1215 log::error!("failed to setup event handlers within 15 seconds.");
1216 }
1217}
1218
1219pub(crate) type BrowserControl = (
1220 std::sync::Arc<chromiumoxide::Browser>,
1221 Option<tokio::task::JoinHandle<()>>,
1222 Option<chromiumoxide::cdp::browser_protocol::browser::BrowserContextId>,
1223);
1224
1225#[cfg(all(feature = "smart", not(feature = "decentralized")))]
1227pub(crate) type OnceBrowser = tokio::sync::OnceCell<Option<BrowserController>>;
1228
1229pub struct BrowserController {
1231 pub browser: BrowserControl,
1233 pub closed: bool,
1235 pub browser_dead: std::sync::Arc<std::sync::atomic::AtomicBool>,
1239}
1240
1241impl BrowserController {
1242 pub(crate) fn new(
1244 browser: BrowserControl,
1245 browser_dead: std::sync::Arc<std::sync::atomic::AtomicBool>,
1246 ) -> Self {
1247 BrowserController {
1248 browser,
1249 closed: false,
1250 browser_dead,
1251 }
1252 }
1253 pub fn dispose(&mut self) {
1255 if !self.closed {
1256 self.closed = true;
1257 if let Some(handler) = self.browser.1.take() {
1258 handler.abort();
1259 }
1260 }
1261 }
1262}
1263
1264impl Drop for BrowserController {
1265 fn drop(&mut self) {
1266 self.dispose();
1267 }
1268}
1269
1270#[cfg(all(feature = "chrome", not(feature = "decentralized")))]
1283pub(crate) struct TabCloseGuard(Option<chromiumoxide::Page>);
1284
1285#[cfg(all(feature = "chrome", not(feature = "decentralized")))]
1286impl TabCloseGuard {
1287 #[inline]
1289 pub fn new(page: chromiumoxide::Page) -> Self {
1290 Self(Some(page))
1291 }
1292
1293 #[inline]
1295 pub fn defuse(mut self) {
1296 self.0 = None;
1297 }
1299}
1300
1301#[cfg(all(feature = "chrome", not(feature = "decentralized")))]
1302impl Drop for TabCloseGuard {
1303 fn drop(&mut self) {
1304 if let Some(page) = self.0.take() {
1305 tokio::task::spawn(async move {
1306 let _ =
1309 tokio::time::timeout(tokio::time::Duration::from_secs(5), page.close()).await;
1310 });
1311 }
1312 }
1313}