1use std::collections::HashMap;
9use std::sync::Arc;
10use std::time::{Duration, Instant};
11
12use bytes::Bytes;
13use http::{HeaderMap, HeaderName, HeaderValue, Method};
14use thiserror::Error;
15use tokio::sync::Mutex;
16use tokio::time::sleep;
17use url::Url;
18
19use crate::challenges::core::{
20 ChallengeExecutionError, ChallengeHttpClient, ChallengeResponse, ChallengeSubmission,
21 OriginalRequest, ReqwestChallengeHttpClient, execute_challenge_submission,
22};
23use crate::challenges::detectors::ChallengeDetection;
24use crate::challenges::pipeline::{
25 ChallengePipeline, ChallengePipelineResult, PipelineContext, PipelineError, UnsupportedReason,
26};
27use crate::challenges::solvers::access_denied::ProxyPool;
28use crate::challenges::solvers::{
29 MitigationPlan, TlsProfileManager, access_denied::AccessDeniedHandler,
30 bot_management::BotManagementHandler, javascript_v1::JavascriptV1Solver,
31 javascript_v2::JavascriptV2Solver, managed_v3::ManagedV3Solver, rate_limit::RateLimitHandler,
32 turnstile::TurnstileSolver,
33};
34use crate::challenges::user_agents::{
35 UserAgentError, UserAgentOptions, UserAgentProfile, get_user_agent_profile,
36};
37use crate::external_deps::captcha::CaptchaProvider;
38use crate::external_deps::interpreters::{BoaJavascriptInterpreter, JavascriptInterpreter};
39use crate::modules::adaptive_timing::{
40 AdaptiveTimingStrategy, BehaviorProfile, DefaultAdaptiveTiming, RequestKind, TimingOutcome,
41 TimingRequest,
42};
43use crate::modules::anti_detection::{
44 AntiDetectionContext, AntiDetectionStrategy, DefaultAntiDetection,
45};
46use crate::modules::events::{
47 ChallengeEvent, EventDispatcher, LoggingHandler, MetricsHandler, PostResponseEvent,
48 PreRequestEvent, RetryEvent, ScraperEvent,
49};
50use crate::modules::metrics::MetricsCollector;
51use crate::modules::ml::{FeatureVector, MLOptimizer};
52use crate::modules::performance::PerformanceMonitor;
53use crate::modules::proxy::{ProxyConfig, ProxyManager};
54use crate::modules::spoofing::{ConsistencyLevel, FingerprintGenerator};
55use crate::modules::state::StateManager;
56use crate::modules::tls::{DefaultTLSManager, TLSConfig};
57
58pub type CloudScraperResult<T> = Result<T, CloudScraperError>;
60
61#[derive(Debug, Error)]
63pub enum CloudScraperError {
64 #[error("http error: {0}")]
65 Http(#[from] reqwest::Error),
66 #[error("url parse error: {0}")]
67 Url(#[from] url::ParseError),
68 #[error("user-agent initialisation failed: {0}")]
69 UserAgent(#[from] UserAgentError),
70 #[error("challenge execution failed: {0}")]
71 ChallengeExecution(#[from] ChallengeExecutionError),
72 #[error("challenge pipeline error: {0}")]
73 Pipeline(#[from] PipelineError),
74 #[error("unsupported challenge ({0})")]
75 Unsupported(UnsupportedReason),
76 #[error("utf8 conversion failed: {0}")]
77 Utf8(#[from] std::string::FromUtf8Error),
78 #[error("header conversion failed: {0}")]
79 InvalidHeader(String),
80 #[error("mitigation required but retries exhausted: {0:?}")]
81 Mitigation(Box<MitigationPlan>),
82 #[error("challenge handling aborted: {0}")]
83 Aborted(String),
84}
85
86#[derive(Debug, Clone)]
88pub struct ScraperResponse {
89 status: u16,
90 headers: HeaderMap,
91 body: Bytes,
92 url: Url,
93}
94
95impl ScraperResponse {
96 fn new(status: u16, headers: HeaderMap, body: Bytes, url: Url) -> Self {
97 Self {
98 status,
99 headers,
100 body,
101 url,
102 }
103 }
104
105 pub fn status(&self) -> u16 {
107 self.status
108 }
109
110 pub fn url(&self) -> &Url {
112 &self.url
113 }
114
115 pub fn headers(&self) -> &HeaderMap {
117 &self.headers
118 }
119
120 pub async fn text(&self) -> CloudScraperResult<String> {
122 Ok(String::from_utf8(self.body.to_vec())?)
123 }
124
125 pub async fn bytes(&self) -> Bytes {
127 self.body.clone()
128 }
129}
130
131#[derive(Clone)]
133pub struct CloudScraperConfig {
134 pub user_agent: UserAgentOptions,
135 pub proxies: Vec<String>,
136 pub proxy_config: ProxyConfig,
137 pub enable_metrics: bool,
138 pub enable_performance_monitoring: bool,
139 pub enable_tls_fingerprinting: bool,
140 pub enable_anti_detection: bool,
141 pub enable_spoofing: bool,
142 pub enable_adaptive_timing: bool,
143 pub enable_ml_optimization: bool,
144 pub behavior_profile: BehaviorProfile,
145 pub spoofing_consistency: ConsistencyLevel,
146 pub captcha_provider: Option<Arc<dyn CaptchaProvider>>,
147 pub interpreter: Option<Arc<dyn JavascriptInterpreter>>,
148 pub tls_config: TLSConfig,
149 pub max_challenge_attempts: usize,
150}
151
152impl Default for CloudScraperConfig {
153 fn default() -> Self {
154 Self {
155 user_agent: UserAgentOptions::default(),
156 proxies: Vec::new(),
157 proxy_config: ProxyConfig::default(),
158 enable_metrics: true,
159 enable_performance_monitoring: true,
160 enable_tls_fingerprinting: true,
161 enable_anti_detection: true,
162 enable_spoofing: true,
163 enable_adaptive_timing: true,
164 enable_ml_optimization: true,
165 behavior_profile: BehaviorProfile::Casual,
166 spoofing_consistency: ConsistencyLevel::Domain,
167 captcha_provider: None,
168 interpreter: None,
169 tls_config: TLSConfig::default(),
170 max_challenge_attempts: 3,
171 }
172 }
173}
174
175pub struct CloudScraperBuilder {
177 config: CloudScraperConfig,
178}
179
180impl CloudScraperBuilder {
181 pub fn new() -> Self {
182 Self {
183 config: CloudScraperConfig::default(),
184 }
185 }
186
187 pub fn with_user_agent_options(mut self, options: UserAgentOptions) -> Self {
188 self.config.user_agent = options;
189 self
190 }
191
192 pub fn with_proxies<I, S>(mut self, proxies: I) -> Self
193 where
194 I: IntoIterator<Item = S>,
195 S: Into<String>,
196 {
197 self.config.proxies = proxies.into_iter().map(Into::into).collect();
198 self
199 }
200
201 pub fn with_proxy_config(mut self, config: ProxyConfig) -> Self {
202 self.config.proxy_config = config;
203 self
204 }
205
206 pub fn with_captcha_provider(mut self, provider: Arc<dyn CaptchaProvider>) -> Self {
207 self.config.captcha_provider = Some(provider);
208 self
209 }
210
211 pub fn with_interpreter(mut self, interpreter: Arc<dyn JavascriptInterpreter>) -> Self {
212 self.config.interpreter = Some(interpreter);
213 self
214 }
215
216 pub fn disable_metrics(mut self) -> Self {
217 self.config.enable_metrics = false;
218 self
219 }
220
221 pub fn disable_performance_monitoring(mut self) -> Self {
222 self.config.enable_performance_monitoring = false;
223 self
224 }
225
226 pub fn disable_tls_fingerprinting(mut self) -> Self {
227 self.config.enable_tls_fingerprinting = false;
228 self
229 }
230
231 pub fn disable_anti_detection(mut self) -> Self {
232 self.config.enable_anti_detection = false;
233 self
234 }
235
236 pub fn disable_spoofing(mut self) -> Self {
237 self.config.enable_spoofing = false;
238 self
239 }
240
241 pub fn disable_adaptive_timing(mut self) -> Self {
242 self.config.enable_adaptive_timing = false;
243 self
244 }
245
246 pub fn disable_ml_optimization(mut self) -> Self {
247 self.config.enable_ml_optimization = false;
248 self
249 }
250
251 pub fn with_behavior_profile(mut self, profile: BehaviorProfile) -> Self {
252 self.config.behavior_profile = profile;
253 self
254 }
255
256 pub fn with_spoofing_consistency(mut self, level: ConsistencyLevel) -> Self {
257 self.config.spoofing_consistency = level;
258 self
259 }
260
261 pub fn with_tls_config(mut self, config: TLSConfig) -> Self {
262 self.config.tls_config = config;
263 self
264 }
265
266 pub fn with_max_challenge_attempts(mut self, attempts: usize) -> Self {
267 self.config.max_challenge_attempts = attempts.max(1);
268 self
269 }
270
271 pub fn build(self) -> CloudScraperResult<CloudScraper> {
272 CloudScraper::with_config(self.config)
273 }
274}
275
276impl Default for CloudScraperBuilder {
277 fn default() -> Self {
278 Self::new()
279 }
280}
281
282struct CloudScraperInner {
284 pipeline: ChallengePipeline,
285 proxy_manager: Option<ProxyManager>,
286 current_proxy: Option<String>,
287 tls_manager: Option<DefaultTLSManager>,
288 fingerprint: Option<FingerprintGenerator>,
289 anti_detection: Option<DefaultAntiDetection>,
290 adaptive_timing: Option<DefaultAdaptiveTiming>,
291 performance_monitor: Option<PerformanceMonitor>,
292 ml_optimizer: Option<MLOptimizer>,
293}
294
295impl CloudScraperInner {
296 fn new(pipeline: ChallengePipeline) -> Self {
297 Self {
298 pipeline,
299 proxy_manager: None,
300 current_proxy: None,
301 tls_manager: None,
302 fingerprint: None,
303 anti_detection: None,
304 adaptive_timing: None,
305 performance_monitor: None,
306 ml_optimizer: None,
307 }
308 }
309}
310
311struct ClientPool {
313 base_headers: reqwest::header::HeaderMap,
314 clients: Mutex<HashMap<Option<String>, reqwest::Client>>,
315}
316
317impl ClientPool {
318 fn new(base_headers: reqwest::header::HeaderMap) -> Self {
319 Self {
320 base_headers,
321 clients: Mutex::new(HashMap::new()),
322 }
323 }
324
325 async fn client(&self, proxy: Option<&str>) -> CloudScraperResult<reqwest::Client> {
326 let mut guard = self.clients.lock().await;
327 let key = proxy.map(|p| p.to_string());
328 if let Some(client) = guard.get(&key) {
329 return Ok(client.clone());
330 }
331
332 let mut builder = reqwest::Client::builder()
333 .cookie_store(true)
334 .default_headers(self.base_headers.clone());
335
336 if let Some(endpoint) = proxy {
337 builder = builder.proxy(reqwest::Proxy::all(endpoint)?);
338 }
339
340 let client = builder.build()?;
341 guard.insert(key.clone(), client.clone());
342 Ok(client)
343 }
344}
345
346pub struct CloudScraper {
348 config: CloudScraperConfig,
349 base_headers_http: HeaderMap,
350 client_pool: Arc<ClientPool>,
351 challenge_client: Arc<dyn ChallengeHttpClient>,
352 state: StateManager,
353 metrics: Option<MetricsCollector>,
354 events: Arc<EventDispatcher>,
355 inner: Mutex<CloudScraperInner>,
356}
357
358impl CloudScraper {
359 pub fn new() -> CloudScraperResult<Self> {
361 CloudScraper::with_config(CloudScraperConfig::default())
362 }
363
364 pub fn builder() -> CloudScraperBuilder {
366 CloudScraperBuilder::new()
367 }
368
369 fn with_config(config: CloudScraperConfig) -> CloudScraperResult<Self> {
370 let profile = get_user_agent_profile(config.user_agent.clone())?;
371 let base_headers_http = to_http_headers(&profile)?;
372 let base_headers_reqwest = to_reqwest_headers(&base_headers_http)?;
373
374 let mut pipeline = ChallengePipeline::default();
375 let interpreter: Arc<dyn JavascriptInterpreter> = config
376 .interpreter
377 .clone()
378 .unwrap_or_else(|| Arc::new(BoaJavascriptInterpreter::new()));
379
380 let mut js_v2 = JavascriptV2Solver::new();
381 let mut turnstile = TurnstileSolver::new();
382 if let Some(provider) = &config.captcha_provider {
383 js_v2 = js_v2.with_captcha_provider(provider.clone());
384 turnstile = turnstile.with_captcha_provider(provider.clone());
385 }
386
387 pipeline = pipeline
388 .with_javascript_v1(JavascriptV1Solver::new(interpreter.clone()))
389 .with_javascript_v2(js_v2)
390 .with_managed_v3(ManagedV3Solver::new(interpreter))
391 .with_turnstile(turnstile)
392 .with_rate_limit(RateLimitHandler::new())
393 .with_access_denied(AccessDeniedHandler::new())
394 .with_bot_management(BotManagementHandler::new());
395
396 let mut inner = CloudScraperInner::new(pipeline);
397
398 if !config.proxies.is_empty() {
399 let mut manager = ProxyManager::new(config.proxy_config.clone());
400 manager.load(config.proxies.iter().cloned());
401 inner.proxy_manager = Some(manager);
402 }
403
404 if config.enable_tls_fingerprinting {
405 inner.tls_manager = Some(DefaultTLSManager::new(config.tls_config.clone()));
406 }
407
408 if config.enable_spoofing {
409 let mut generator = FingerprintGenerator::default();
410 generator = generator.with_consistency(config.spoofing_consistency);
411 inner.fingerprint = Some(generator);
412 }
413
414 if config.enable_anti_detection {
415 inner.anti_detection = Some(DefaultAntiDetection::new(Default::default()));
416 }
417
418 if config.enable_adaptive_timing {
419 let mut timing = DefaultAdaptiveTiming::new();
420 timing.set_behavior_profile(config.behavior_profile);
421 inner.adaptive_timing = Some(timing);
422 }
423
424 if config.enable_performance_monitoring {
425 inner.performance_monitor = Some(PerformanceMonitor::new(Default::default()));
426 }
427
428 if config.enable_ml_optimization {
429 inner.ml_optimizer = Some(MLOptimizer::default());
430 }
431
432 let client_pool = Arc::new(ClientPool::new(base_headers_reqwest));
433 let challenge_client = Arc::new(ReqwestChallengeHttpClient::new()?);
434 let state = StateManager::new();
435 let metrics = config.enable_metrics.then(MetricsCollector::new);
436
437 let mut events = EventDispatcher::new();
438 events.register_handler(Arc::new(LoggingHandler));
439 if let Some(ref collector) = metrics {
440 events.register_handler(Arc::new(MetricsHandler::new(collector.clone())));
441 }
442
443 Ok(Self {
444 config,
445 base_headers_http,
446 client_pool,
447 challenge_client,
448 state,
449 metrics,
450 events: Arc::new(events),
451 inner: Mutex::new(inner),
452 })
453 }
454
455 pub async fn get(&self, url: &str) -> CloudScraperResult<ScraperResponse> {
457 let url = Url::parse(url)?;
458 self.request(Method::GET, url, None).await
459 }
460
461 pub async fn request(
463 &self,
464 method: Method,
465 url: Url,
466 body: Option<Vec<u8>>,
467 ) -> CloudScraperResult<ScraperResponse> {
468 let mut forced_proxy: Option<String> = None;
469 let mut attempt = 0usize;
470
471 loop {
472 attempt += 1;
473
474 let (headers_http, anti_ctx, proxy, mut delay) = self
475 .prepare_request(
476 &method,
477 &url,
478 body.as_ref().map(|b| b.len()).unwrap_or(0),
479 forced_proxy.take(),
480 )
481 .await?;
482
483 if let Some(hint) = anti_ctx.delay_hint()
484 && hint > delay
485 {
486 delay = hint;
487 }
488
489 self.events
490 .dispatch(ScraperEvent::PreRequest(PreRequestEvent {
491 url: url.clone(),
492 method: method.clone(),
493 headers: headers_http.clone(),
494 timestamp: chrono::Utc::now(),
495 }));
496
497 let client = self.client_pool.client(proxy.as_deref()).await?;
498
499 if delay > Duration::from_millis(0) {
500 sleep(delay).await;
501 }
502
503 let req_headers = to_reqwest_headers(&headers_http)?;
504 let mut builder = client
505 .request(method.clone(), url.clone())
506 .headers(req_headers);
507 if let Some(ref body) = body {
508 builder = builder.body(body.clone());
509 }
510
511 let started = Instant::now();
512 let resp = builder.send().await?;
513 let latency = started.elapsed();
514
515 let final_url = resp.url().clone();
516 let status = resp.status().as_u16();
517 let headers_raw = resp.headers().clone();
518 let body_bytes = resp.bytes().await?.to_vec();
519 let body_text = String::from_utf8_lossy(&body_bytes).to_string();
520
521 let http_headers = reqwest_to_http(&headers_raw)?;
522 let challenge_response = ChallengeResponse {
523 url: &final_url,
524 status,
525 headers: &http_headers,
526 body: &body_text,
527 request_method: &method,
528 };
529
530 self.events
531 .dispatch(ScraperEvent::PostResponse(PostResponseEvent {
532 url: final_url.clone(),
533 method: method.clone(),
534 status,
535 latency,
536 timestamp: chrono::Utc::now(),
537 }));
538
539 let result = {
540 let mut guard = self.inner.lock().await;
541 let CloudScraperInner {
542 pipeline,
543 proxy_manager,
544 current_proxy,
545 tls_manager,
546 fingerprint,
547 ..
548 } = &mut *guard;
549
550 pipeline
551 .evaluate(
552 &challenge_response,
553 PipelineContext {
554 proxy_pool: proxy_manager.as_mut().map(|pm| pm as &mut dyn ProxyPool),
555 current_proxy: current_proxy.as_deref(),
556 failure_recorder: Some(&self.state),
557 fingerprint_manager: fingerprint.as_mut().map(|fp| {
558 fp as &mut dyn crate::challenges::solvers::FingerprintManager
559 }),
560 tls_manager: tls_manager
561 .as_mut()
562 .map(|tls| tls as &mut dyn TlsProfileManager),
563 },
564 )
565 .await
566 };
567
568 match result {
569 ChallengePipelineResult::NoChallenge => {
570 self.record_outcome(true, status, latency, delay, &final_url)
571 .await;
572 let response = ScraperResponse::new(
573 status,
574 http_headers.clone(),
575 Bytes::from(body_bytes),
576 final_url,
577 );
578 return Ok(response);
579 }
580 ChallengePipelineResult::Submission {
581 detection,
582 submission,
583 } => {
584 let (response, challenge_latency) = self
585 .handle_submission(
586 submission,
587 detection,
588 &method,
589 &url,
590 headers_http.clone(),
591 body.clone(),
592 )
593 .await?;
594 self.record_outcome(
595 response.status() < 500,
596 response.status(),
597 latency + challenge_latency,
598 delay,
599 response.url(),
600 )
601 .await;
602 return Ok(response);
603 }
604 ChallengePipelineResult::Mitigation { detection, plan } => {
605 self.record_outcome(false, status, latency, delay, &final_url)
606 .await;
607 self.events
608 .dispatch(ScraperEvent::Challenge(ChallengeEvent {
609 domain: detection.url.clone(),
610 challenge_type: format!("{:?}", detection.challenge_type),
611 success: false,
612 metadata: vec![
613 ("reason".into(), plan.reason.clone()),
614 ("pattern".into(), detection.pattern_id.clone()),
615 ],
616 timestamp: chrono::Utc::now(),
617 }));
618
619 if let Some(wait) = plan.wait {
620 sleep(wait).await;
621 }
622
623 if let Some(ref proxy_hint) = plan.new_proxy {
624 forced_proxy = Some(proxy_hint.clone());
625 }
626
627 let should_retry =
628 plan.should_retry && attempt < self.config.max_challenge_attempts;
629 if should_retry {
630 self.events.dispatch(ScraperEvent::Retry(RetryEvent {
631 domain: detection.url,
632 attempt: (attempt + 1) as u32,
633 reason: plan.reason.clone(),
634 scheduled_after: plan.wait.unwrap_or_default(),
635 timestamp: chrono::Utc::now(),
636 }));
637 continue;
638 } else {
639 return Err(CloudScraperError::Mitigation(Box::new(plan)));
640 }
641 }
642 ChallengePipelineResult::Unsupported { detection, reason } => {
643 self.record_outcome(false, status, latency, delay, &final_url)
644 .await;
645 self.events
646 .dispatch(ScraperEvent::Challenge(ChallengeEvent {
647 domain: detection.url,
648 challenge_type: detection.pattern_name,
649 success: false,
650 metadata: vec![("reason".into(), reason.to_string())],
651 timestamp: chrono::Utc::now(),
652 }));
653 return Err(CloudScraperError::Unsupported(reason));
654 }
655 ChallengePipelineResult::Failed { detection, error } => {
656 self.record_outcome(false, status, latency, delay, &final_url)
657 .await;
658 self.events
659 .dispatch(ScraperEvent::Error(crate::modules::events::ErrorEvent {
660 domain: detection.url,
661 error: error.to_string(),
662 timestamp: chrono::Utc::now(),
663 }));
664 return Err(CloudScraperError::Pipeline(error));
665 }
666 }
667 }
668 }
669
670 async fn handle_submission(
671 &self,
672 submission: ChallengeSubmission,
673 detection: ChallengeDetection,
674 method: &Method,
675 url: &Url,
676 headers: HeaderMap,
677 body: Option<Vec<u8>>,
678 ) -> CloudScraperResult<(ScraperResponse, Duration)> {
679 let original = OriginalRequest::new(method.clone(), url.clone())
680 .with_headers(headers)
681 .with_body(body);
682
683 let started = Instant::now();
684 let result =
685 execute_challenge_submission(self.challenge_client.clone(), submission, original).await;
686 let challenge_latency = started.elapsed();
687
688 let success = result.is_ok();
689 {
690 let mut guard = self.inner.lock().await;
691 guard
692 .pipeline
693 .record_outcome(&detection.pattern_id, success);
694 }
695
696 let final_response = result?;
697 let response = ScraperResponse::new(
698 final_response.status,
699 final_response.headers.clone(),
700 Bytes::from(final_response.body.clone()),
701 final_response.url.clone(),
702 );
703
704 self.events
705 .dispatch(ScraperEvent::Challenge(ChallengeEvent {
706 domain: detection.url,
707 challenge_type: detection.pattern_name,
708 success,
709 metadata: vec![
710 ("pattern".into(), detection.pattern_id),
711 ("status".into(), final_response.status.to_string()),
712 ],
713 timestamp: chrono::Utc::now(),
714 }));
715
716 self.events
717 .dispatch(ScraperEvent::PostResponse(PostResponseEvent {
718 url: response.url().clone(),
719 method: method.clone(),
720 status: response.status(),
721 latency: challenge_latency,
722 timestamp: chrono::Utc::now(),
723 }));
724
725 Ok((response, challenge_latency))
726 }
727
728 async fn record_outcome(
729 &self,
730 success: bool,
731 status: u16,
732 latency: Duration,
733 delay: Duration,
734 url: &Url,
735 ) {
736 let domain = url.host_str().unwrap_or_default();
737 if success {
738 self.state.record_success(domain);
739 } else {
740 self.state
741 .record_failure(domain, format!("status_{status}"));
742 }
743
744 if let Some(ref collector) = self.metrics {
745 collector.record_response(domain, status, latency);
746 }
747
748 let mut guard = self.inner.lock().await;
749 if let Some(timing) = guard.adaptive_timing.as_mut() {
750 let outcome = TimingOutcome {
751 success,
752 response_time: latency,
753 applied_delay: delay,
754 };
755 timing.record_outcome(domain, &outcome);
756 }
757
758 if let Some(anti) = guard.anti_detection.as_mut() {
759 anti.record_response(domain, status, latency);
760 }
761
762 if let Some(perf) = guard.performance_monitor.as_mut()
763 && let Some(report) = perf.record(domain, latency, success)
764 && !report.alerts.is_empty()
765 {
766 log::warn!("performance alerts: {:#?}", report.alerts);
767 }
768
769 if let Some(ml) = guard.ml_optimizer.as_mut() {
770 let mut features = FeatureVector::new();
771 features.insert("latency".into(), latency.as_secs_f64());
772 features.insert("delay".into(), delay.as_secs_f64());
773 ml.record_attempt(domain, features, success, Some(delay.as_secs_f64()));
774 }
775 }
776
777 async fn prepare_request(
778 &self,
779 method: &Method,
780 url: &Url,
781 body_size: usize,
782 forced_proxy: Option<String>,
783 ) -> CloudScraperResult<(HeaderMap, AntiDetectionContext, Option<String>, Duration)> {
784 let mut headers = self.base_headers_http.clone();
785 if let Some(state) = self.state.get(url.host_str().unwrap_or("")) {
786 for (name, value) in state.sticky_headers {
787 let header_name = HeaderName::from_bytes(name.as_bytes())
788 .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
789 let header_value = HeaderValue::from_str(&value)
790 .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
791 headers.insert(header_name, header_value);
792 }
793 }
794
795 let mut anti_ctx =
796 AntiDetectionContext::new(url.clone(), method.clone()).with_headers(headers.clone());
797 anti_ctx.set_body_size(body_size);
798
799 let mut proxy = forced_proxy;
800 let mut delay = Duration::from_millis(0);
801
802 {
803 let mut guard = self.inner.lock().await;
804
805 if let Some(ref mut generator) = guard.fingerprint
806 && let Some(domain) = url.host_str()
807 {
808 let fp = generator.generate_for(domain);
809 anti_ctx.set_user_agent(fp.user_agent.clone());
810 headers.insert(
811 HeaderName::from_static("user-agent"),
812 HeaderValue::from_str(&fp.user_agent)
813 .map_err(|_| CloudScraperError::InvalidHeader("user-agent".into()))?,
814 );
815 headers.insert(
816 HeaderName::from_static("accept-language"),
817 HeaderValue::from_str(&fp.accept_language)
818 .map_err(|_| CloudScraperError::InvalidHeader("accept-language".into()))?,
819 );
820 }
821
822 if let Some(ref mut anti) = guard.anti_detection {
823 anti.prepare_request(url.host_str().unwrap_or(""), &mut anti_ctx);
824 headers = anti_ctx.headers.clone();
825 }
826
827 if proxy.is_none() {
828 let next = guard.proxy_manager.as_mut().and_then(|pm| pm.next_proxy());
829 guard.current_proxy = next.clone();
830 proxy = next;
831 } else {
832 guard.current_proxy = proxy.clone();
833 }
834
835 if let Some(ref mut timing) = guard.adaptive_timing {
836 let request = TimingRequest::new(request_kind(method), body_size);
837 delay = timing.calculate_delay(url.host_str().unwrap_or(""), &request);
838 }
839 }
840
841 Ok((headers, anti_ctx, proxy, delay))
842 }
843}
844
845fn request_kind(method: &Method) -> RequestKind {
846 match *method {
847 Method::GET => RequestKind::Get,
848 Method::POST => RequestKind::Post,
849 Method::PUT => RequestKind::Put,
850 Method::PATCH => RequestKind::Patch,
851 Method::DELETE => RequestKind::Delete,
852 Method::HEAD => RequestKind::Head,
853 Method::OPTIONS => RequestKind::Options,
854 _ => RequestKind::Other,
855 }
856}
857
858fn to_http_headers(profile: &UserAgentProfile) -> CloudScraperResult<HeaderMap> {
859 let mut headers = HeaderMap::new();
860 for (name, value) in &profile.headers {
861 let header_name = HeaderName::from_bytes(name.as_bytes())
862 .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
863 let header_value = HeaderValue::from_str(value)
864 .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
865 headers.insert(header_name, header_value);
866 }
867 Ok(headers)
868}
869
870fn to_reqwest_headers(headers: &HeaderMap) -> CloudScraperResult<reqwest::header::HeaderMap> {
871 let mut map = reqwest::header::HeaderMap::new();
872 for (name, value) in headers.iter() {
873 let header_name = reqwest::header::HeaderName::from_bytes(name.as_str().as_bytes())
874 .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
875 let header_value = reqwest::header::HeaderValue::from_bytes(value.as_bytes())
876 .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
877 map.insert(header_name, header_value);
878 }
879 Ok(map)
880}
881
882fn reqwest_to_http(headers: &reqwest::header::HeaderMap) -> CloudScraperResult<HeaderMap> {
883 let mut map = HeaderMap::new();
884 for (name, value) in headers.iter() {
885 let header_name = HeaderName::from_bytes(name.as_str().as_bytes())
886 .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
887 let header_value = HeaderValue::from_bytes(value.as_bytes())
888 .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
889 map.insert(header_name, header_value);
890 }
891 Ok(map)
892}