cloudscraper_rs/
cloudscraper.rs

1//! High level scraper orchestration.
2//!
3//! Wires together the challenge detectors, solvers, and adaptive subsystems
4//! (timing, anti-detection, spoofing, TLS, ML, metrics…) to expose an
5//! ergonomic HTTP client capable of transparently handling Cloudflare
6//! defences.
7
8use std::collections::HashMap;
9use std::sync::Arc;
10use std::time::{Duration, Instant};
11
12use bytes::Bytes;
13use http::{HeaderMap, HeaderName, HeaderValue, Method};
14use thiserror::Error;
15use tokio::sync::Mutex;
16use tokio::time::sleep;
17use url::Url;
18
19use crate::challenges::core::{
20    ChallengeExecutionError, ChallengeHttpClient, ChallengeResponse, ChallengeSubmission,
21    OriginalRequest, ReqwestChallengeHttpClient, execute_challenge_submission,
22};
23use crate::challenges::detectors::ChallengeDetection;
24use crate::challenges::pipeline::{
25    ChallengePipeline, ChallengePipelineResult, PipelineContext, PipelineError, UnsupportedReason,
26};
27use crate::challenges::solvers::access_denied::ProxyPool;
28use crate::challenges::solvers::{
29    MitigationPlan, TlsProfileManager, access_denied::AccessDeniedHandler,
30    bot_management::BotManagementHandler, javascript_v1::JavascriptV1Solver,
31    javascript_v2::JavascriptV2Solver, managed_v3::ManagedV3Solver, rate_limit::RateLimitHandler,
32    turnstile::TurnstileSolver,
33};
34use crate::challenges::user_agents::{
35    UserAgentError, UserAgentOptions, UserAgentProfile, get_user_agent_profile,
36};
37use crate::external_deps::captcha::CaptchaProvider;
38use crate::external_deps::interpreters::{BoaJavascriptInterpreter, JavascriptInterpreter};
39use crate::modules::adaptive_timing::{
40    AdaptiveTimingStrategy, BehaviorProfile, DefaultAdaptiveTiming, RequestKind, TimingOutcome,
41    TimingRequest,
42};
43use crate::modules::anti_detection::{
44    AntiDetectionContext, AntiDetectionStrategy, DefaultAntiDetection,
45};
46use crate::modules::events::{
47    ChallengeEvent, EventDispatcher, LoggingHandler, MetricsHandler, PostResponseEvent,
48    PreRequestEvent, RetryEvent, ScraperEvent,
49};
50use crate::modules::metrics::MetricsCollector;
51use crate::modules::ml::{FeatureVector, MLOptimizer};
52use crate::modules::performance::PerformanceMonitor;
53use crate::modules::proxy::{ProxyConfig, ProxyManager};
54use crate::modules::spoofing::{ConsistencyLevel, FingerprintGenerator};
55use crate::modules::state::StateManager;
56use crate::modules::tls::{DefaultTLSManager, TLSConfig};
57
58/// Result alias used across the orchestration layer.
59pub type CloudScraperResult<T> = Result<T, CloudScraperError>;
60
61/// High-level error surfaced by the orchestrator.
62#[derive(Debug, Error)]
63pub enum CloudScraperError {
64    #[error("http error: {0}")]
65    Http(#[from] reqwest::Error),
66    #[error("url parse error: {0}")]
67    Url(#[from] url::ParseError),
68    #[error("user-agent initialisation failed: {0}")]
69    UserAgent(#[from] UserAgentError),
70    #[error("challenge execution failed: {0}")]
71    ChallengeExecution(#[from] ChallengeExecutionError),
72    #[error("challenge pipeline error: {0}")]
73    Pipeline(#[from] PipelineError),
74    #[error("unsupported challenge ({0})")]
75    Unsupported(UnsupportedReason),
76    #[error("utf8 conversion failed: {0}")]
77    Utf8(#[from] std::string::FromUtf8Error),
78    #[error("header conversion failed: {0}")]
79    InvalidHeader(String),
80    #[error("mitigation required but retries exhausted: {0:?}")]
81    Mitigation(Box<MitigationPlan>),
82    #[error("challenge handling aborted: {0}")]
83    Aborted(String),
84}
85
86/// Read-only HTTP response returned by the scraper.
87#[derive(Debug, Clone)]
88pub struct ScraperResponse {
89    status: u16,
90    headers: HeaderMap,
91    body: Bytes,
92    url: Url,
93}
94
95impl ScraperResponse {
96    fn new(status: u16, headers: HeaderMap, body: Bytes, url: Url) -> Self {
97        Self {
98            status,
99            headers,
100            body,
101            url,
102        }
103    }
104
105    /// HTTP status code as returned by Cloudflare/target origin.
106    pub fn status(&self) -> u16 {
107        self.status
108    }
109
110    /// Final URL after challenge handling / redirects.
111    pub fn url(&self) -> &Url {
112        &self.url
113    }
114
115    /// Response headers.
116    pub fn headers(&self) -> &HeaderMap {
117        &self.headers
118    }
119
120    /// Convenience helper returning the body as UTF-8 text.
121    pub async fn text(&self) -> CloudScraperResult<String> {
122        Ok(String::from_utf8(self.body.to_vec())?)
123    }
124
125    /// Raw body bytes.
126    pub async fn bytes(&self) -> Bytes {
127        self.body.clone()
128    }
129}
130
131/// Scraper configuration used by the builder.
132#[derive(Clone)]
133pub struct CloudScraperConfig {
134    pub user_agent: UserAgentOptions,
135    pub proxies: Vec<String>,
136    pub proxy_config: ProxyConfig,
137    pub enable_metrics: bool,
138    pub enable_performance_monitoring: bool,
139    pub enable_tls_fingerprinting: bool,
140    pub enable_anti_detection: bool,
141    pub enable_spoofing: bool,
142    pub enable_adaptive_timing: bool,
143    pub enable_ml_optimization: bool,
144    pub behavior_profile: BehaviorProfile,
145    pub spoofing_consistency: ConsistencyLevel,
146    pub captcha_provider: Option<Arc<dyn CaptchaProvider>>,
147    pub interpreter: Option<Arc<dyn JavascriptInterpreter>>,
148    pub tls_config: TLSConfig,
149    pub max_challenge_attempts: usize,
150}
151
152impl Default for CloudScraperConfig {
153    fn default() -> Self {
154        Self {
155            user_agent: UserAgentOptions::default(),
156            proxies: Vec::new(),
157            proxy_config: ProxyConfig::default(),
158            enable_metrics: true,
159            enable_performance_monitoring: true,
160            enable_tls_fingerprinting: true,
161            enable_anti_detection: true,
162            enable_spoofing: true,
163            enable_adaptive_timing: true,
164            enable_ml_optimization: true,
165            behavior_profile: BehaviorProfile::Casual,
166            spoofing_consistency: ConsistencyLevel::Domain,
167            captcha_provider: None,
168            interpreter: None,
169            tls_config: TLSConfig::default(),
170            max_challenge_attempts: 3,
171        }
172    }
173}
174
175/// Fluent builder for [`CloudScraper`].
176pub struct CloudScraperBuilder {
177    config: CloudScraperConfig,
178}
179
180impl CloudScraperBuilder {
181    pub fn new() -> Self {
182        Self {
183            config: CloudScraperConfig::default(),
184        }
185    }
186
187    pub fn with_user_agent_options(mut self, options: UserAgentOptions) -> Self {
188        self.config.user_agent = options;
189        self
190    }
191
192    pub fn with_proxies<I, S>(mut self, proxies: I) -> Self
193    where
194        I: IntoIterator<Item = S>,
195        S: Into<String>,
196    {
197        self.config.proxies = proxies.into_iter().map(Into::into).collect();
198        self
199    }
200
201    pub fn with_proxy_config(mut self, config: ProxyConfig) -> Self {
202        self.config.proxy_config = config;
203        self
204    }
205
206    pub fn with_captcha_provider(mut self, provider: Arc<dyn CaptchaProvider>) -> Self {
207        self.config.captcha_provider = Some(provider);
208        self
209    }
210
211    pub fn with_interpreter(mut self, interpreter: Arc<dyn JavascriptInterpreter>) -> Self {
212        self.config.interpreter = Some(interpreter);
213        self
214    }
215
216    pub fn disable_metrics(mut self) -> Self {
217        self.config.enable_metrics = false;
218        self
219    }
220
221    pub fn disable_performance_monitoring(mut self) -> Self {
222        self.config.enable_performance_monitoring = false;
223        self
224    }
225
226    pub fn disable_tls_fingerprinting(mut self) -> Self {
227        self.config.enable_tls_fingerprinting = false;
228        self
229    }
230
231    pub fn disable_anti_detection(mut self) -> Self {
232        self.config.enable_anti_detection = false;
233        self
234    }
235
236    pub fn disable_spoofing(mut self) -> Self {
237        self.config.enable_spoofing = false;
238        self
239    }
240
241    pub fn disable_adaptive_timing(mut self) -> Self {
242        self.config.enable_adaptive_timing = false;
243        self
244    }
245
246    pub fn disable_ml_optimization(mut self) -> Self {
247        self.config.enable_ml_optimization = false;
248        self
249    }
250
251    pub fn with_behavior_profile(mut self, profile: BehaviorProfile) -> Self {
252        self.config.behavior_profile = profile;
253        self
254    }
255
256    pub fn with_spoofing_consistency(mut self, level: ConsistencyLevel) -> Self {
257        self.config.spoofing_consistency = level;
258        self
259    }
260
261    pub fn with_tls_config(mut self, config: TLSConfig) -> Self {
262        self.config.tls_config = config;
263        self
264    }
265
266    pub fn with_max_challenge_attempts(mut self, attempts: usize) -> Self {
267        self.config.max_challenge_attempts = attempts.max(1);
268        self
269    }
270
271    pub fn build(self) -> CloudScraperResult<CloudScraper> {
272        CloudScraper::with_config(self.config)
273    }
274}
275
276impl Default for CloudScraperBuilder {
277    fn default() -> Self {
278        Self::new()
279    }
280}
281
282/// Stateful helper shared between concurrent requests.
283struct CloudScraperInner {
284    pipeline: ChallengePipeline,
285    proxy_manager: Option<ProxyManager>,
286    current_proxy: Option<String>,
287    tls_manager: Option<DefaultTLSManager>,
288    fingerprint: Option<FingerprintGenerator>,
289    anti_detection: Option<DefaultAntiDetection>,
290    adaptive_timing: Option<DefaultAdaptiveTiming>,
291    performance_monitor: Option<PerformanceMonitor>,
292    ml_optimizer: Option<MLOptimizer>,
293}
294
295impl CloudScraperInner {
296    fn new(pipeline: ChallengePipeline) -> Self {
297        Self {
298            pipeline,
299            proxy_manager: None,
300            current_proxy: None,
301            tls_manager: None,
302            fingerprint: None,
303            anti_detection: None,
304            adaptive_timing: None,
305            performance_monitor: None,
306            ml_optimizer: None,
307        }
308    }
309}
310
311/// Reqwest client pool keyed by proxy endpoint.
312struct ClientPool {
313    base_headers: reqwest::header::HeaderMap,
314    clients: Mutex<HashMap<Option<String>, reqwest::Client>>,
315}
316
317impl ClientPool {
318    fn new(base_headers: reqwest::header::HeaderMap) -> Self {
319        Self {
320            base_headers,
321            clients: Mutex::new(HashMap::new()),
322        }
323    }
324
325    async fn client(&self, proxy: Option<&str>) -> CloudScraperResult<reqwest::Client> {
326        let mut guard = self.clients.lock().await;
327        let key = proxy.map(|p| p.to_string());
328        if let Some(client) = guard.get(&key) {
329            return Ok(client.clone());
330        }
331
332        let mut builder = reqwest::Client::builder()
333            .cookie_store(true)
334            .default_headers(self.base_headers.clone());
335
336        if let Some(endpoint) = proxy {
337            builder = builder.proxy(reqwest::Proxy::all(endpoint)?);
338        }
339
340        let client = builder.build()?;
341        guard.insert(key.clone(), client.clone());
342        Ok(client)
343    }
344}
345
346/// Main scraper orchestrator.
347pub struct CloudScraper {
348    config: CloudScraperConfig,
349    base_headers_http: HeaderMap,
350    client_pool: Arc<ClientPool>,
351    challenge_client: Arc<dyn ChallengeHttpClient>,
352    state: StateManager,
353    metrics: Option<MetricsCollector>,
354    events: Arc<EventDispatcher>,
355    inner: Mutex<CloudScraperInner>,
356}
357
358impl CloudScraper {
359    /// Construct a scraper with default configuration.
360    pub fn new() -> CloudScraperResult<Self> {
361        CloudScraper::with_config(CloudScraperConfig::default())
362    }
363
364    /// Obtain a builder to customise the scraper instance.
365    pub fn builder() -> CloudScraperBuilder {
366        CloudScraperBuilder::new()
367    }
368
369    fn with_config(config: CloudScraperConfig) -> CloudScraperResult<Self> {
370        let profile = get_user_agent_profile(config.user_agent.clone())?;
371        let base_headers_http = to_http_headers(&profile)?;
372        let base_headers_reqwest = to_reqwest_headers(&base_headers_http)?;
373
374        let mut pipeline = ChallengePipeline::default();
375        let interpreter: Arc<dyn JavascriptInterpreter> = config
376            .interpreter
377            .clone()
378            .unwrap_or_else(|| Arc::new(BoaJavascriptInterpreter::new()));
379
380        let mut js_v2 = JavascriptV2Solver::new();
381        let mut turnstile = TurnstileSolver::new();
382        if let Some(provider) = &config.captcha_provider {
383            js_v2 = js_v2.with_captcha_provider(provider.clone());
384            turnstile = turnstile.with_captcha_provider(provider.clone());
385        }
386
387        pipeline = pipeline
388            .with_javascript_v1(JavascriptV1Solver::new(interpreter.clone()))
389            .with_javascript_v2(js_v2)
390            .with_managed_v3(ManagedV3Solver::new(interpreter))
391            .with_turnstile(turnstile)
392            .with_rate_limit(RateLimitHandler::new())
393            .with_access_denied(AccessDeniedHandler::new())
394            .with_bot_management(BotManagementHandler::new());
395
396        let mut inner = CloudScraperInner::new(pipeline);
397
398        if !config.proxies.is_empty() {
399            let mut manager = ProxyManager::new(config.proxy_config.clone());
400            manager.load(config.proxies.iter().cloned());
401            inner.proxy_manager = Some(manager);
402        }
403
404        if config.enable_tls_fingerprinting {
405            inner.tls_manager = Some(DefaultTLSManager::new(config.tls_config.clone()));
406        }
407
408        if config.enable_spoofing {
409            let mut generator = FingerprintGenerator::default();
410            generator = generator.with_consistency(config.spoofing_consistency);
411            inner.fingerprint = Some(generator);
412        }
413
414        if config.enable_anti_detection {
415            inner.anti_detection = Some(DefaultAntiDetection::new(Default::default()));
416        }
417
418        if config.enable_adaptive_timing {
419            let mut timing = DefaultAdaptiveTiming::new();
420            timing.set_behavior_profile(config.behavior_profile);
421            inner.adaptive_timing = Some(timing);
422        }
423
424        if config.enable_performance_monitoring {
425            inner.performance_monitor = Some(PerformanceMonitor::new(Default::default()));
426        }
427
428        if config.enable_ml_optimization {
429            inner.ml_optimizer = Some(MLOptimizer::default());
430        }
431
432        let client_pool = Arc::new(ClientPool::new(base_headers_reqwest));
433        let challenge_client = Arc::new(ReqwestChallengeHttpClient::new()?);
434        let state = StateManager::new();
435        let metrics = config.enable_metrics.then(MetricsCollector::new);
436
437        let mut events = EventDispatcher::new();
438        events.register_handler(Arc::new(LoggingHandler));
439        if let Some(ref collector) = metrics {
440            events.register_handler(Arc::new(MetricsHandler::new(collector.clone())));
441        }
442
443        Ok(Self {
444            config,
445            base_headers_http,
446            client_pool,
447            challenge_client,
448            state,
449            metrics,
450            events: Arc::new(events),
451            inner: Mutex::new(inner),
452        })
453    }
454
455    /// Perform an HTTP GET request.
456    pub async fn get(&self, url: &str) -> CloudScraperResult<ScraperResponse> {
457        let url = Url::parse(url)?;
458        self.request(Method::GET, url, None).await
459    }
460
461    /// Perform an arbitrary HTTP request.
462    pub async fn request(
463        &self,
464        method: Method,
465        url: Url,
466        body: Option<Vec<u8>>,
467    ) -> CloudScraperResult<ScraperResponse> {
468        let mut forced_proxy: Option<String> = None;
469        let mut attempt = 0usize;
470
471        loop {
472            attempt += 1;
473
474            let (headers_http, anti_ctx, proxy, mut delay) = self
475                .prepare_request(
476                    &method,
477                    &url,
478                    body.as_ref().map(|b| b.len()).unwrap_or(0),
479                    forced_proxy.take(),
480                )
481                .await?;
482
483            if let Some(hint) = anti_ctx.delay_hint()
484                && hint > delay
485            {
486                delay = hint;
487            }
488
489            self.events
490                .dispatch(ScraperEvent::PreRequest(PreRequestEvent {
491                    url: url.clone(),
492                    method: method.clone(),
493                    headers: headers_http.clone(),
494                    timestamp: chrono::Utc::now(),
495                }));
496
497            let client = self.client_pool.client(proxy.as_deref()).await?;
498
499            if delay > Duration::from_millis(0) {
500                sleep(delay).await;
501            }
502
503            let req_headers = to_reqwest_headers(&headers_http)?;
504            let mut builder = client
505                .request(method.clone(), url.clone())
506                .headers(req_headers);
507            if let Some(ref body) = body {
508                builder = builder.body(body.clone());
509            }
510
511            let started = Instant::now();
512            let resp = builder.send().await?;
513            let latency = started.elapsed();
514
515            let final_url = resp.url().clone();
516            let status = resp.status().as_u16();
517            let headers_raw = resp.headers().clone();
518            let body_bytes = resp.bytes().await?.to_vec();
519            let body_text = String::from_utf8_lossy(&body_bytes).to_string();
520
521            let http_headers = reqwest_to_http(&headers_raw)?;
522            let challenge_response = ChallengeResponse {
523                url: &final_url,
524                status,
525                headers: &http_headers,
526                body: &body_text,
527                request_method: &method,
528            };
529
530            self.events
531                .dispatch(ScraperEvent::PostResponse(PostResponseEvent {
532                    url: final_url.clone(),
533                    method: method.clone(),
534                    status,
535                    latency,
536                    timestamp: chrono::Utc::now(),
537                }));
538
539            let result = {
540                let mut guard = self.inner.lock().await;
541                let CloudScraperInner {
542                    pipeline,
543                    proxy_manager,
544                    current_proxy,
545                    tls_manager,
546                    fingerprint,
547                    ..
548                } = &mut *guard;
549
550                pipeline
551                    .evaluate(
552                        &challenge_response,
553                        PipelineContext {
554                            proxy_pool: proxy_manager.as_mut().map(|pm| pm as &mut dyn ProxyPool),
555                            current_proxy: current_proxy.as_deref(),
556                            failure_recorder: Some(&self.state),
557                            fingerprint_manager: fingerprint.as_mut().map(|fp| {
558                                fp as &mut dyn crate::challenges::solvers::FingerprintManager
559                            }),
560                            tls_manager: tls_manager
561                                .as_mut()
562                                .map(|tls| tls as &mut dyn TlsProfileManager),
563                        },
564                    )
565                    .await
566            };
567
568            match result {
569                ChallengePipelineResult::NoChallenge => {
570                    self.record_outcome(true, status, latency, delay, &final_url)
571                        .await;
572                    let response = ScraperResponse::new(
573                        status,
574                        http_headers.clone(),
575                        Bytes::from(body_bytes),
576                        final_url,
577                    );
578                    return Ok(response);
579                }
580                ChallengePipelineResult::Submission {
581                    detection,
582                    submission,
583                } => {
584                    let (response, challenge_latency) = self
585                        .handle_submission(
586                            submission,
587                            detection,
588                            &method,
589                            &url,
590                            headers_http.clone(),
591                            body.clone(),
592                        )
593                        .await?;
594                    self.record_outcome(
595                        response.status() < 500,
596                        response.status(),
597                        latency + challenge_latency,
598                        delay,
599                        response.url(),
600                    )
601                    .await;
602                    return Ok(response);
603                }
604                ChallengePipelineResult::Mitigation { detection, plan } => {
605                    self.record_outcome(false, status, latency, delay, &final_url)
606                        .await;
607                    self.events
608                        .dispatch(ScraperEvent::Challenge(ChallengeEvent {
609                            domain: detection.url.clone(),
610                            challenge_type: format!("{:?}", detection.challenge_type),
611                            success: false,
612                            metadata: vec![
613                                ("reason".into(), plan.reason.clone()),
614                                ("pattern".into(), detection.pattern_id.clone()),
615                            ],
616                            timestamp: chrono::Utc::now(),
617                        }));
618
619                    if let Some(wait) = plan.wait {
620                        sleep(wait).await;
621                    }
622
623                    if let Some(ref proxy_hint) = plan.new_proxy {
624                        forced_proxy = Some(proxy_hint.clone());
625                    }
626
627                    let should_retry =
628                        plan.should_retry && attempt < self.config.max_challenge_attempts;
629                    if should_retry {
630                        self.events.dispatch(ScraperEvent::Retry(RetryEvent {
631                            domain: detection.url,
632                            attempt: (attempt + 1) as u32,
633                            reason: plan.reason.clone(),
634                            scheduled_after: plan.wait.unwrap_or_default(),
635                            timestamp: chrono::Utc::now(),
636                        }));
637                        continue;
638                    } else {
639                        return Err(CloudScraperError::Mitigation(Box::new(plan)));
640                    }
641                }
642                ChallengePipelineResult::Unsupported { detection, reason } => {
643                    self.record_outcome(false, status, latency, delay, &final_url)
644                        .await;
645                    self.events
646                        .dispatch(ScraperEvent::Challenge(ChallengeEvent {
647                            domain: detection.url,
648                            challenge_type: detection.pattern_name,
649                            success: false,
650                            metadata: vec![("reason".into(), reason.to_string())],
651                            timestamp: chrono::Utc::now(),
652                        }));
653                    return Err(CloudScraperError::Unsupported(reason));
654                }
655                ChallengePipelineResult::Failed { detection, error } => {
656                    self.record_outcome(false, status, latency, delay, &final_url)
657                        .await;
658                    self.events
659                        .dispatch(ScraperEvent::Error(crate::modules::events::ErrorEvent {
660                            domain: detection.url,
661                            error: error.to_string(),
662                            timestamp: chrono::Utc::now(),
663                        }));
664                    return Err(CloudScraperError::Pipeline(error));
665                }
666            }
667        }
668    }
669
670    async fn handle_submission(
671        &self,
672        submission: ChallengeSubmission,
673        detection: ChallengeDetection,
674        method: &Method,
675        url: &Url,
676        headers: HeaderMap,
677        body: Option<Vec<u8>>,
678    ) -> CloudScraperResult<(ScraperResponse, Duration)> {
679        let original = OriginalRequest::new(method.clone(), url.clone())
680            .with_headers(headers)
681            .with_body(body);
682
683        let started = Instant::now();
684        let result =
685            execute_challenge_submission(self.challenge_client.clone(), submission, original).await;
686        let challenge_latency = started.elapsed();
687
688        let success = result.is_ok();
689        {
690            let mut guard = self.inner.lock().await;
691            guard
692                .pipeline
693                .record_outcome(&detection.pattern_id, success);
694        }
695
696        let final_response = result?;
697        let response = ScraperResponse::new(
698            final_response.status,
699            final_response.headers.clone(),
700            Bytes::from(final_response.body.clone()),
701            final_response.url.clone(),
702        );
703
704        self.events
705            .dispatch(ScraperEvent::Challenge(ChallengeEvent {
706                domain: detection.url,
707                challenge_type: detection.pattern_name,
708                success,
709                metadata: vec![
710                    ("pattern".into(), detection.pattern_id),
711                    ("status".into(), final_response.status.to_string()),
712                ],
713                timestamp: chrono::Utc::now(),
714            }));
715
716        self.events
717            .dispatch(ScraperEvent::PostResponse(PostResponseEvent {
718                url: response.url().clone(),
719                method: method.clone(),
720                status: response.status(),
721                latency: challenge_latency,
722                timestamp: chrono::Utc::now(),
723            }));
724
725        Ok((response, challenge_latency))
726    }
727
728    async fn record_outcome(
729        &self,
730        success: bool,
731        status: u16,
732        latency: Duration,
733        delay: Duration,
734        url: &Url,
735    ) {
736        let domain = url.host_str().unwrap_or_default();
737        if success {
738            self.state.record_success(domain);
739        } else {
740            self.state
741                .record_failure(domain, format!("status_{status}"));
742        }
743
744        if let Some(ref collector) = self.metrics {
745            collector.record_response(domain, status, latency);
746        }
747
748        let mut guard = self.inner.lock().await;
749        if let Some(timing) = guard.adaptive_timing.as_mut() {
750            let outcome = TimingOutcome {
751                success,
752                response_time: latency,
753                applied_delay: delay,
754            };
755            timing.record_outcome(domain, &outcome);
756        }
757
758        if let Some(anti) = guard.anti_detection.as_mut() {
759            anti.record_response(domain, status, latency);
760        }
761
762        if let Some(perf) = guard.performance_monitor.as_mut()
763            && let Some(report) = perf.record(domain, latency, success)
764            && !report.alerts.is_empty()
765        {
766            log::warn!("performance alerts: {:#?}", report.alerts);
767        }
768
769        if let Some(ml) = guard.ml_optimizer.as_mut() {
770            let mut features = FeatureVector::new();
771            features.insert("latency".into(), latency.as_secs_f64());
772            features.insert("delay".into(), delay.as_secs_f64());
773            ml.record_attempt(domain, features, success, Some(delay.as_secs_f64()));
774        }
775    }
776
777    async fn prepare_request(
778        &self,
779        method: &Method,
780        url: &Url,
781        body_size: usize,
782        forced_proxy: Option<String>,
783    ) -> CloudScraperResult<(HeaderMap, AntiDetectionContext, Option<String>, Duration)> {
784        let mut headers = self.base_headers_http.clone();
785        if let Some(state) = self.state.get(url.host_str().unwrap_or("")) {
786            for (name, value) in state.sticky_headers {
787                let header_name = HeaderName::from_bytes(name.as_bytes())
788                    .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
789                let header_value = HeaderValue::from_str(&value)
790                    .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
791                headers.insert(header_name, header_value);
792            }
793        }
794
795        let mut anti_ctx =
796            AntiDetectionContext::new(url.clone(), method.clone()).with_headers(headers.clone());
797        anti_ctx.set_body_size(body_size);
798
799        let mut proxy = forced_proxy;
800        let mut delay = Duration::from_millis(0);
801
802        {
803            let mut guard = self.inner.lock().await;
804
805            if let Some(ref mut generator) = guard.fingerprint
806                && let Some(domain) = url.host_str()
807            {
808                let fp = generator.generate_for(domain);
809                anti_ctx.set_user_agent(fp.user_agent.clone());
810                headers.insert(
811                    HeaderName::from_static("user-agent"),
812                    HeaderValue::from_str(&fp.user_agent)
813                        .map_err(|_| CloudScraperError::InvalidHeader("user-agent".into()))?,
814                );
815                headers.insert(
816                    HeaderName::from_static("accept-language"),
817                    HeaderValue::from_str(&fp.accept_language)
818                        .map_err(|_| CloudScraperError::InvalidHeader("accept-language".into()))?,
819                );
820            }
821
822            if let Some(ref mut anti) = guard.anti_detection {
823                anti.prepare_request(url.host_str().unwrap_or(""), &mut anti_ctx);
824                headers = anti_ctx.headers.clone();
825            }
826
827            if proxy.is_none() {
828                let next = guard.proxy_manager.as_mut().and_then(|pm| pm.next_proxy());
829                guard.current_proxy = next.clone();
830                proxy = next;
831            } else {
832                guard.current_proxy = proxy.clone();
833            }
834
835            if let Some(ref mut timing) = guard.adaptive_timing {
836                let request = TimingRequest::new(request_kind(method), body_size);
837                delay = timing.calculate_delay(url.host_str().unwrap_or(""), &request);
838            }
839        }
840
841        Ok((headers, anti_ctx, proxy, delay))
842    }
843}
844
845fn request_kind(method: &Method) -> RequestKind {
846    match *method {
847        Method::GET => RequestKind::Get,
848        Method::POST => RequestKind::Post,
849        Method::PUT => RequestKind::Put,
850        Method::PATCH => RequestKind::Patch,
851        Method::DELETE => RequestKind::Delete,
852        Method::HEAD => RequestKind::Head,
853        Method::OPTIONS => RequestKind::Options,
854        _ => RequestKind::Other,
855    }
856}
857
858fn to_http_headers(profile: &UserAgentProfile) -> CloudScraperResult<HeaderMap> {
859    let mut headers = HeaderMap::new();
860    for (name, value) in &profile.headers {
861        let header_name = HeaderName::from_bytes(name.as_bytes())
862            .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
863        let header_value = HeaderValue::from_str(value)
864            .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
865        headers.insert(header_name, header_value);
866    }
867    Ok(headers)
868}
869
870fn to_reqwest_headers(headers: &HeaderMap) -> CloudScraperResult<reqwest::header::HeaderMap> {
871    let mut map = reqwest::header::HeaderMap::new();
872    for (name, value) in headers.iter() {
873        let header_name = reqwest::header::HeaderName::from_bytes(name.as_str().as_bytes())
874            .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
875        let header_value = reqwest::header::HeaderValue::from_bytes(value.as_bytes())
876            .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
877        map.insert(header_name, header_value);
878    }
879    Ok(map)
880}
881
882fn reqwest_to_http(headers: &reqwest::header::HeaderMap) -> CloudScraperResult<HeaderMap> {
883    let mut map = HeaderMap::new();
884    for (name, value) in headers.iter() {
885        let header_name = HeaderName::from_bytes(name.as_str().as_bytes())
886            .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
887        let header_value = HeaderValue::from_bytes(value.as_bytes())
888            .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
889        map.insert(header_name, header_value);
890    }
891    Ok(map)
892}