cloudscraper_rs/
cloudscraper.rs

1//! High level scraper orchestration.
2//!
3//! Wires together the challenge detectors, solvers, and adaptive subsystems
4//! (timing, anti-detection, spoofing, TLS, ML, metrics…) to expose an
5//! ergonomic HTTP client capable of transparently handling Cloudflare
6//! defences.
7
8use std::collections::HashMap;
9use std::sync::Arc;
10use std::time::{Duration, Instant};
11
12use bytes::Bytes;
13use http::{HeaderMap, HeaderName, HeaderValue, Method};
14use thiserror::Error;
15use tokio::sync::Mutex;
16use tokio::time::sleep;
17use url::Url;
18
19use crate::challenges::core::{
20    ChallengeExecutionError, ChallengeHttpClient, ChallengeResponse, ChallengeSubmission,
21    OriginalRequest, ReqwestChallengeHttpClient, execute_challenge_submission,
22};
23use crate::challenges::detectors::ChallengeDetection;
24use crate::challenges::pipeline::{
25    ChallengePipeline, ChallengePipelineResult, PipelineContext, PipelineError, UnsupportedReason,
26};
27use crate::challenges::solvers::access_denied::ProxyPool;
28use crate::challenges::solvers::{
29    MitigationPlan, TlsProfileManager, access_denied::AccessDeniedHandler,
30    bot_management::BotManagementHandler, javascript_v1::JavascriptV1Solver,
31    javascript_v2::JavascriptV2Solver, managed_v3::ManagedV3Solver, rate_limit::RateLimitHandler,
32    turnstile::TurnstileSolver,
33};
34use crate::challenges::user_agents::{
35    UserAgentError, UserAgentOptions, UserAgentProfile, get_user_agent_profile,
36};
37use crate::external_deps::captcha::CaptchaProvider;
38use crate::external_deps::interpreters::{BoaJavascriptInterpreter, JavascriptInterpreter};
39use crate::modules::adaptive_timing::{
40    AdaptiveTimingStrategy, BehaviorProfile, DefaultAdaptiveTiming, RequestKind, TimingOutcome,
41    TimingRequest,
42};
43use crate::modules::anti_detection::{
44    AntiDetectionContext, AntiDetectionStrategy, DefaultAntiDetection,
45};
46use crate::modules::events::{
47    ChallengeEvent, EventDispatcher, LoggingHandler, MetricsHandler, PostResponseEvent,
48    PreRequestEvent, RetryEvent, ScraperEvent,
49};
50use crate::modules::metrics::MetricsCollector;
51use crate::modules::ml::{FeatureVector, MLOptimizer};
52use crate::modules::performance::PerformanceMonitor;
53use crate::modules::proxy::{ProxyConfig, ProxyManager};
54use crate::modules::spoofing::{ConsistencyLevel, FingerprintGenerator};
55use crate::modules::state::StateManager;
56use crate::modules::tls::{DefaultTLSManager, TLSConfig};
57
58/// Result alias used across the orchestration layer.
59pub type CloudScraperResult<T> = Result<T, CloudScraperError>;
60
61/// High-level error surfaced by the orchestrator.
62#[derive(Debug, Error)]
63pub enum CloudScraperError {
64    #[error("http error: {0}")]
65    Http(#[from] reqwest::Error),
66    #[error("url parse error: {0}")]
67    Url(#[from] url::ParseError),
68    #[error("user-agent initialisation failed: {0}")]
69    UserAgent(#[from] UserAgentError),
70    #[error("challenge execution failed: {0}")]
71    ChallengeExecution(#[from] ChallengeExecutionError),
72    #[error("challenge pipeline error: {0}")]
73    Pipeline(#[from] PipelineError),
74    #[error("unsupported challenge ({0})")]
75    Unsupported(UnsupportedReason),
76    #[error("utf8 conversion failed: {0}")]
77    Utf8(#[from] std::string::FromUtf8Error),
78    #[error("header conversion failed: {0}")]
79    InvalidHeader(String),
80    #[error("mitigation required but retries exhausted: {0:?}")]
81    Mitigation(Box<MitigationPlan>),
82    #[error("challenge handling aborted: {0}")]
83    Aborted(String),
84}
85
86/// Read-only HTTP response returned by the scraper.
87#[derive(Debug, Clone)]
88pub struct ScraperResponse {
89    status: u16,
90    headers: HeaderMap,
91    body: Bytes,
92    url: Url,
93}
94
95impl ScraperResponse {
96    fn new(status: u16, headers: HeaderMap, body: Bytes, url: Url) -> Self {
97        Self {
98            status,
99            headers,
100            body,
101            url,
102        }
103    }
104
105    /// HTTP status code as returned by Cloudflare/target origin.
106    pub fn status(&self) -> u16 {
107        self.status
108    }
109
110    /// Final URL after challenge handling / redirects.
111    pub fn url(&self) -> &Url {
112        &self.url
113    }
114
115    /// Response headers.
116    pub fn headers(&self) -> &HeaderMap {
117        &self.headers
118    }
119
120    /// Convenience helper returning the body as UTF-8 text.
121    pub async fn text(&self) -> CloudScraperResult<String> {
122        Ok(String::from_utf8(self.body.to_vec())?)
123    }
124
125    /// Raw body bytes.
126    pub async fn bytes(&self) -> Bytes {
127        self.body.clone()
128    }
129}
130
131/// Scraper configuration used by the builder.
132#[derive(Clone)]
133pub struct CloudScraperConfig {
134    pub user_agent: UserAgentOptions,
135    pub content_type: Option<String>,
136    pub proxies: Vec<String>,
137    pub proxy_config: ProxyConfig,
138    pub enable_metrics: bool,
139    pub enable_performance_monitoring: bool,
140    pub enable_tls_fingerprinting: bool,
141    pub enable_anti_detection: bool,
142    pub enable_spoofing: bool,
143    pub enable_adaptive_timing: bool,
144    pub enable_ml_optimization: bool,
145    pub behavior_profile: BehaviorProfile,
146    pub spoofing_consistency: ConsistencyLevel,
147    pub captcha_provider: Option<Arc<dyn CaptchaProvider>>,
148    pub interpreter: Option<Arc<dyn JavascriptInterpreter>>,
149    pub tls_config: TLSConfig,
150    pub max_challenge_attempts: usize,
151}
152
153impl Default for CloudScraperConfig {
154    fn default() -> Self {
155        Self {
156            user_agent: UserAgentOptions::default(),
157            content_type: None,
158            proxies: Vec::new(),
159            proxy_config: ProxyConfig::default(),
160            enable_metrics: true,
161            enable_performance_monitoring: true,
162            enable_tls_fingerprinting: true,
163            enable_anti_detection: true,
164            enable_spoofing: true,
165            enable_adaptive_timing: true,
166            enable_ml_optimization: true,
167            behavior_profile: BehaviorProfile::Casual,
168            spoofing_consistency: ConsistencyLevel::Domain,
169            captcha_provider: None,
170            interpreter: None,
171            tls_config: TLSConfig::default(),
172            max_challenge_attempts: 3,
173        }
174    }
175}
176
177/// Fluent builder for [`CloudScraper`].
178pub struct CloudScraperBuilder {
179    config: CloudScraperConfig,
180}
181
182impl CloudScraperBuilder {
183    pub fn new() -> Self {
184        Self {
185            config: CloudScraperConfig::default(),
186        }
187    }
188
189    pub fn with_user_agent_options(mut self, options: UserAgentOptions) -> Self {
190        self.config.user_agent = options;
191        self
192    }
193
194    pub fn with_content_type(mut self, content_type: String) -> Self {
195        self.config.content_type = Some(content_type);
196        self
197    }
198
199    pub fn with_proxies<I, S>(mut self, proxies: I) -> Self
200    where
201        I: IntoIterator<Item = S>,
202        S: Into<String>,
203    {
204        self.config.proxies = proxies.into_iter().map(Into::into).collect();
205        self
206    }
207
208    pub fn with_proxy_config(mut self, config: ProxyConfig) -> Self {
209        self.config.proxy_config = config;
210        self
211    }
212
213    pub fn with_captcha_provider(mut self, provider: Arc<dyn CaptchaProvider>) -> Self {
214        self.config.captcha_provider = Some(provider);
215        self
216    }
217
218    pub fn with_interpreter(mut self, interpreter: Arc<dyn JavascriptInterpreter>) -> Self {
219        self.config.interpreter = Some(interpreter);
220        self
221    }
222
223    pub fn disable_metrics(mut self) -> Self {
224        self.config.enable_metrics = false;
225        self
226    }
227
228    pub fn disable_performance_monitoring(mut self) -> Self {
229        self.config.enable_performance_monitoring = false;
230        self
231    }
232
233    pub fn disable_tls_fingerprinting(mut self) -> Self {
234        self.config.enable_tls_fingerprinting = false;
235        self
236    }
237
238    pub fn disable_anti_detection(mut self) -> Self {
239        self.config.enable_anti_detection = false;
240        self
241    }
242
243    pub fn disable_spoofing(mut self) -> Self {
244        self.config.enable_spoofing = false;
245        self
246    }
247
248    pub fn disable_adaptive_timing(mut self) -> Self {
249        self.config.enable_adaptive_timing = false;
250        self
251    }
252
253    pub fn disable_ml_optimization(mut self) -> Self {
254        self.config.enable_ml_optimization = false;
255        self
256    }
257
258    pub fn with_behavior_profile(mut self, profile: BehaviorProfile) -> Self {
259        self.config.behavior_profile = profile;
260        self
261    }
262
263    pub fn with_spoofing_consistency(mut self, level: ConsistencyLevel) -> Self {
264        self.config.spoofing_consistency = level;
265        self
266    }
267
268    pub fn with_tls_config(mut self, config: TLSConfig) -> Self {
269        self.config.tls_config = config;
270        self
271    }
272
273    pub fn with_max_challenge_attempts(mut self, attempts: usize) -> Self {
274        self.config.max_challenge_attempts = attempts.max(1);
275        self
276    }
277
278    pub fn build(self) -> CloudScraperResult<CloudScraper> {
279        CloudScraper::with_config(self.config)
280    }
281}
282
283impl Default for CloudScraperBuilder {
284    fn default() -> Self {
285        Self::new()
286    }
287}
288
289/// Stateful helper shared between concurrent requests.
290struct CloudScraperInner {
291    pipeline: ChallengePipeline,
292    proxy_manager: Option<ProxyManager>,
293    current_proxy: Option<String>,
294    tls_manager: Option<DefaultTLSManager>,
295    fingerprint: Option<FingerprintGenerator>,
296    anti_detection: Option<DefaultAntiDetection>,
297    adaptive_timing: Option<DefaultAdaptiveTiming>,
298    performance_monitor: Option<PerformanceMonitor>,
299    ml_optimizer: Option<MLOptimizer>,
300}
301
302impl CloudScraperInner {
303    fn new(pipeline: ChallengePipeline) -> Self {
304        Self {
305            pipeline,
306            proxy_manager: None,
307            current_proxy: None,
308            tls_manager: None,
309            fingerprint: None,
310            anti_detection: None,
311            adaptive_timing: None,
312            performance_monitor: None,
313            ml_optimizer: None,
314        }
315    }
316}
317
318/// Reqwest client pool keyed by proxy endpoint.
319struct ClientPool {
320    base_headers: reqwest::header::HeaderMap,
321    clients: Mutex<HashMap<Option<String>, reqwest::Client>>,
322}
323
324impl ClientPool {
325    fn new(base_headers: reqwest::header::HeaderMap) -> Self {
326        Self {
327            base_headers,
328            clients: Mutex::new(HashMap::new()),
329        }
330    }
331
332    async fn client(&self, proxy: Option<&str>) -> CloudScraperResult<reqwest::Client> {
333        let mut guard = self.clients.lock().await;
334        let key = proxy.map(|p| p.to_string());
335        if let Some(client) = guard.get(&key) {
336            return Ok(client.clone());
337        }
338
339        let mut builder = reqwest::Client::builder()
340            .cookie_store(true)
341            .default_headers(self.base_headers.clone());
342
343        if let Some(endpoint) = proxy {
344            builder = builder.proxy(reqwest::Proxy::all(endpoint)?);
345        }
346
347        let client = builder.build()?;
348        guard.insert(key.clone(), client.clone());
349        Ok(client)
350    }
351}
352
353/// Main scraper orchestrator.
354pub struct CloudScraper {
355    config: CloudScraperConfig,
356    base_headers_http: HeaderMap,
357    client_pool: Arc<ClientPool>,
358    challenge_client: Arc<dyn ChallengeHttpClient>,
359    state: StateManager,
360    metrics: Option<MetricsCollector>,
361    events: Arc<EventDispatcher>,
362    inner: Mutex<CloudScraperInner>,
363}
364
365impl CloudScraper {
366    /// Construct a scraper with default configuration.
367    pub fn new() -> CloudScraperResult<Self> {
368        CloudScraper::with_config(CloudScraperConfig::default())
369    }
370
371    /// Obtain a builder to customise the scraper instance.
372    pub fn builder() -> CloudScraperBuilder {
373        CloudScraperBuilder::new()
374    }
375
376    fn with_config(config: CloudScraperConfig) -> CloudScraperResult<Self> {
377        let profile = get_user_agent_profile(config.user_agent.clone())?;
378        let base_headers_http = to_http_headers(&profile)?;
379        let base_headers_reqwest = to_reqwest_headers(&base_headers_http)?;
380
381        let mut pipeline = ChallengePipeline::default();
382        let interpreter: Arc<dyn JavascriptInterpreter> = config
383            .interpreter
384            .clone()
385            .unwrap_or_else(|| Arc::new(BoaJavascriptInterpreter::new()));
386
387        let mut js_v2 = JavascriptV2Solver::new();
388        let mut turnstile = TurnstileSolver::new();
389        if let Some(provider) = &config.captcha_provider {
390            js_v2 = js_v2.with_captcha_provider(provider.clone());
391            turnstile = turnstile.with_captcha_provider(provider.clone());
392        }
393
394        pipeline = pipeline
395            .with_javascript_v1(JavascriptV1Solver::new(interpreter.clone()))
396            .with_javascript_v2(js_v2)
397            .with_managed_v3(ManagedV3Solver::new(interpreter))
398            .with_turnstile(turnstile)
399            .with_rate_limit(RateLimitHandler::new())
400            .with_access_denied(AccessDeniedHandler::new())
401            .with_bot_management(BotManagementHandler::new());
402
403        let mut inner = CloudScraperInner::new(pipeline);
404
405        if !config.proxies.is_empty() {
406            let mut manager = ProxyManager::new(config.proxy_config.clone());
407            manager.load(config.proxies.iter().cloned());
408            inner.proxy_manager = Some(manager);
409        }
410
411        if config.enable_tls_fingerprinting {
412            inner.tls_manager = Some(DefaultTLSManager::new(config.tls_config.clone()));
413        }
414
415        if config.enable_spoofing {
416            let mut generator = FingerprintGenerator::default();
417            generator = generator.with_consistency(config.spoofing_consistency);
418            inner.fingerprint = Some(generator);
419        }
420
421        if config.enable_anti_detection {
422            inner.anti_detection = Some(DefaultAntiDetection::new(Default::default()));
423        }
424
425        if config.enable_adaptive_timing {
426            let mut timing = DefaultAdaptiveTiming::new();
427            timing.set_behavior_profile(config.behavior_profile);
428            inner.adaptive_timing = Some(timing);
429        }
430
431        if config.enable_performance_monitoring {
432            inner.performance_monitor = Some(PerformanceMonitor::new(Default::default()));
433        }
434
435        if config.enable_ml_optimization {
436            inner.ml_optimizer = Some(MLOptimizer::default());
437        }
438
439        let client_pool = Arc::new(ClientPool::new(base_headers_reqwest));
440        let challenge_client = Arc::new(ReqwestChallengeHttpClient::new()?);
441        let state = StateManager::new();
442        let metrics = config.enable_metrics.then(MetricsCollector::new);
443
444        let mut events = EventDispatcher::new();
445        events.register_handler(Arc::new(LoggingHandler));
446        if let Some(ref collector) = metrics {
447            events.register_handler(Arc::new(MetricsHandler::new(collector.clone())));
448        }
449
450        Ok(Self {
451            config,
452            base_headers_http,
453            client_pool,
454            challenge_client,
455            state,
456            metrics,
457            events: Arc::new(events),
458            inner: Mutex::new(inner),
459        })
460    }
461
462    /// Perform an HTTP GET request.
463    pub async fn get(&self, url: &str) -> CloudScraperResult<ScraperResponse> {
464        let url = Url::parse(url)?;
465        self.request(Method::GET, url, None).await
466    }
467
468    /// Perform an arbitrary HTTP request.
469    pub async fn request(
470        &self,
471        method: Method,
472        url: Url,
473        body: Option<Vec<u8>>,
474    ) -> CloudScraperResult<ScraperResponse> {
475        let mut forced_proxy: Option<String> = None;
476        let mut attempt = 0usize;
477
478        loop {
479            attempt += 1;
480
481            let (mut headers_http, anti_ctx, proxy, mut delay) = self
482                .prepare_request(
483                    &method,
484                    &url,
485                    body.as_ref().map(|b| b.len()).unwrap_or(0),
486                    forced_proxy.take(),
487                )
488                .await?;
489
490            if let Some(ref ct) = self.config.content_type {
491                headers_http.insert(
492                    HeaderName::from_static("content-type"),
493                    HeaderValue::from_str(ct)
494                        .map_err(|_| CloudScraperError::InvalidHeader("content-type".into()))?,
495                );
496            }
497
498            if let Some(hint) = anti_ctx.delay_hint()
499                && hint > delay
500            {
501                delay = hint;
502            }
503
504            self.events
505                .dispatch(ScraperEvent::PreRequest(PreRequestEvent {
506                    url: url.clone(),
507                    method: method.clone(),
508                    headers: headers_http.clone(),
509                    timestamp: chrono::Utc::now(),
510                }));
511
512            let client = self.client_pool.client(proxy.as_deref()).await?;
513
514            if delay > Duration::from_millis(0) {
515                sleep(delay).await;
516            }
517
518            let req_headers = to_reqwest_headers(&headers_http)?;
519            let mut builder = client
520                .request(method.clone(), url.clone())
521                .headers(req_headers);
522            if let Some(ref body) = body {
523                builder = builder.body(body.clone());
524            }
525
526            let started = Instant::now();
527            let resp = builder.send().await?;
528            let latency = started.elapsed();
529
530            let final_url = resp.url().clone();
531            let status = resp.status().as_u16();
532            let headers_raw = resp.headers().clone();
533            let body_bytes = resp.bytes().await?.to_vec();
534            let body_text = String::from_utf8_lossy(&body_bytes).to_string();
535
536            let http_headers = reqwest_to_http(&headers_raw)?;
537            let challenge_response = ChallengeResponse {
538                url: &final_url,
539                status,
540                headers: &http_headers,
541                body: &body_text,
542                request_method: &method,
543            };
544
545            self.events
546                .dispatch(ScraperEvent::PostResponse(PostResponseEvent {
547                    url: final_url.clone(),
548                    method: method.clone(),
549                    status,
550                    latency,
551                    timestamp: chrono::Utc::now(),
552                }));
553
554            let result = {
555                let mut guard = self.inner.lock().await;
556                let CloudScraperInner {
557                    pipeline,
558                    proxy_manager,
559                    current_proxy,
560                    tls_manager,
561                    fingerprint,
562                    ..
563                } = &mut *guard;
564
565                pipeline
566                    .evaluate(
567                        &challenge_response,
568                        PipelineContext {
569                            proxy_pool: proxy_manager.as_mut().map(|pm| pm as &mut dyn ProxyPool),
570                            current_proxy: current_proxy.as_deref(),
571                            failure_recorder: Some(&self.state),
572                            fingerprint_manager: fingerprint.as_mut().map(|fp| {
573                                fp as &mut dyn crate::challenges::solvers::FingerprintManager
574                            }),
575                            tls_manager: tls_manager
576                                .as_mut()
577                                .map(|tls| tls as &mut dyn TlsProfileManager),
578                        },
579                    )
580                    .await
581            };
582
583            match result {
584                ChallengePipelineResult::NoChallenge => {
585                    self.record_outcome(true, status, latency, delay, &final_url)
586                        .await;
587                    let response = ScraperResponse::new(
588                        status,
589                        http_headers.clone(),
590                        Bytes::from(body_bytes),
591                        final_url,
592                    );
593                    return Ok(response);
594                }
595                ChallengePipelineResult::Submission {
596                    detection,
597                    submission,
598                } => {
599                    let (response, challenge_latency) = self
600                        .handle_submission(
601                            submission,
602                            detection,
603                            &method,
604                            &url,
605                            headers_http.clone(),
606                            body.clone(),
607                        )
608                        .await?;
609                    self.record_outcome(
610                        response.status() < 500,
611                        response.status(),
612                        latency + challenge_latency,
613                        delay,
614                        response.url(),
615                    )
616                    .await;
617                    return Ok(response);
618                }
619                ChallengePipelineResult::Mitigation { detection, plan } => {
620                    self.record_outcome(false, status, latency, delay, &final_url)
621                        .await;
622                    self.events
623                        .dispatch(ScraperEvent::Challenge(ChallengeEvent {
624                            domain: detection.url.clone(),
625                            challenge_type: format!("{:?}", detection.challenge_type),
626                            success: false,
627                            metadata: vec![
628                                ("reason".into(), plan.reason.clone()),
629                                ("pattern".into(), detection.pattern_id.clone()),
630                            ],
631                            timestamp: chrono::Utc::now(),
632                        }));
633
634                    if let Some(wait) = plan.wait {
635                        sleep(wait).await;
636                    }
637
638                    if let Some(ref proxy_hint) = plan.new_proxy {
639                        forced_proxy = Some(proxy_hint.clone());
640                    }
641
642                    let should_retry =
643                        plan.should_retry && attempt < self.config.max_challenge_attempts;
644                    if should_retry {
645                        self.events.dispatch(ScraperEvent::Retry(RetryEvent {
646                            domain: detection.url,
647                            attempt: (attempt + 1) as u32,
648                            reason: plan.reason.clone(),
649                            scheduled_after: plan.wait.unwrap_or_default(),
650                            timestamp: chrono::Utc::now(),
651                        }));
652                        continue;
653                    } else {
654                        return Err(CloudScraperError::Mitigation(Box::new(plan)));
655                    }
656                }
657                ChallengePipelineResult::Unsupported { detection, reason } => {
658                    self.record_outcome(false, status, latency, delay, &final_url)
659                        .await;
660                    self.events
661                        .dispatch(ScraperEvent::Challenge(ChallengeEvent {
662                            domain: detection.url,
663                            challenge_type: detection.pattern_name,
664                            success: false,
665                            metadata: vec![("reason".into(), reason.to_string())],
666                            timestamp: chrono::Utc::now(),
667                        }));
668                    return Err(CloudScraperError::Unsupported(reason));
669                }
670                ChallengePipelineResult::Failed { detection, error } => {
671                    self.record_outcome(false, status, latency, delay, &final_url)
672                        .await;
673                    self.events
674                        .dispatch(ScraperEvent::Error(crate::modules::events::ErrorEvent {
675                            domain: detection.url,
676                            error: error.to_string(),
677                            timestamp: chrono::Utc::now(),
678                        }));
679                    return Err(CloudScraperError::Pipeline(error));
680                }
681            }
682        }
683    }
684
685    async fn handle_submission(
686        &self,
687        submission: ChallengeSubmission,
688        detection: ChallengeDetection,
689        method: &Method,
690        url: &Url,
691        headers: HeaderMap,
692        body: Option<Vec<u8>>,
693    ) -> CloudScraperResult<(ScraperResponse, Duration)> {
694        let original = OriginalRequest::new(method.clone(), url.clone())
695            .with_headers(headers)
696            .with_body(body);
697
698        let started = Instant::now();
699        let result =
700            execute_challenge_submission(self.challenge_client.clone(), submission, original).await;
701        let challenge_latency = started.elapsed();
702
703        let success = result.is_ok();
704        {
705            let mut guard = self.inner.lock().await;
706            guard
707                .pipeline
708                .record_outcome(&detection.pattern_id, success);
709        }
710
711        let final_response = result?;
712        let response = ScraperResponse::new(
713            final_response.status,
714            final_response.headers.clone(),
715            Bytes::from(final_response.body.clone()),
716            final_response.url.clone(),
717        );
718
719        self.events
720            .dispatch(ScraperEvent::Challenge(ChallengeEvent {
721                domain: detection.url,
722                challenge_type: detection.pattern_name,
723                success,
724                metadata: vec![
725                    ("pattern".into(), detection.pattern_id),
726                    ("status".into(), final_response.status.to_string()),
727                ],
728                timestamp: chrono::Utc::now(),
729            }));
730
731        self.events
732            .dispatch(ScraperEvent::PostResponse(PostResponseEvent {
733                url: response.url().clone(),
734                method: method.clone(),
735                status: response.status(),
736                latency: challenge_latency,
737                timestamp: chrono::Utc::now(),
738            }));
739
740        Ok((response, challenge_latency))
741    }
742
743    async fn record_outcome(
744        &self,
745        success: bool,
746        status: u16,
747        latency: Duration,
748        delay: Duration,
749        url: &Url,
750    ) {
751        let domain = url.host_str().unwrap_or_default();
752        if success {
753            self.state.record_success(domain);
754        } else {
755            self.state
756                .record_failure(domain, format!("status_{status}"));
757        }
758
759        if let Some(ref collector) = self.metrics {
760            collector.record_response(domain, status, latency);
761        }
762
763        let mut guard = self.inner.lock().await;
764        if let Some(timing) = guard.adaptive_timing.as_mut() {
765            let outcome = TimingOutcome {
766                success,
767                response_time: latency,
768                applied_delay: delay,
769            };
770            timing.record_outcome(domain, &outcome);
771        }
772
773        if let Some(anti) = guard.anti_detection.as_mut() {
774            anti.record_response(domain, status, latency);
775        }
776
777        if let Some(perf) = guard.performance_monitor.as_mut()
778            && let Some(report) = perf.record(domain, latency, success)
779            && !report.alerts.is_empty()
780        {
781            log::warn!("performance alerts: {:#?}", report.alerts);
782        }
783
784        if let Some(ml) = guard.ml_optimizer.as_mut() {
785            let mut features = FeatureVector::new();
786            features.insert("latency".into(), latency.as_secs_f64());
787            features.insert("delay".into(), delay.as_secs_f64());
788            ml.record_attempt(domain, features, success, Some(delay.as_secs_f64()));
789        }
790    }
791
792    async fn prepare_request(
793        &self,
794        method: &Method,
795        url: &Url,
796        body_size: usize,
797        forced_proxy: Option<String>,
798    ) -> CloudScraperResult<(HeaderMap, AntiDetectionContext, Option<String>, Duration)> {
799        let mut headers = self.base_headers_http.clone();
800        if let Some(state) = self.state.get(url.host_str().unwrap_or("")) {
801            for (name, value) in state.sticky_headers {
802                let header_name = HeaderName::from_bytes(name.as_bytes())
803                    .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
804                let header_value = HeaderValue::from_str(&value)
805                    .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
806                headers.insert(header_name, header_value);
807            }
808        }
809
810        let mut anti_ctx =
811            AntiDetectionContext::new(url.clone(), method.clone()).with_headers(headers.clone());
812        anti_ctx.set_body_size(body_size);
813
814        let mut proxy = forced_proxy;
815        let mut delay = Duration::from_millis(0);
816
817        {
818            let mut guard = self.inner.lock().await;
819
820            if let Some(ref mut generator) = guard.fingerprint
821                && let Some(domain) = url.host_str()
822            {
823                let fp = generator.generate_for(domain);
824                anti_ctx.set_user_agent(fp.user_agent.clone());
825                headers.insert(
826                    HeaderName::from_static("user-agent"),
827                    HeaderValue::from_str(&fp.user_agent)
828                        .map_err(|_| CloudScraperError::InvalidHeader("user-agent".into()))?,
829                );
830                headers.insert(
831                    HeaderName::from_static("accept-language"),
832                    HeaderValue::from_str(&fp.accept_language)
833                        .map_err(|_| CloudScraperError::InvalidHeader("accept-language".into()))?,
834                );
835            }
836
837            if let Some(ref mut anti) = guard.anti_detection {
838                anti.prepare_request(url.host_str().unwrap_or(""), &mut anti_ctx);
839                headers = anti_ctx.headers.clone();
840            }
841
842            if proxy.is_none() {
843                let next = guard.proxy_manager.as_mut().and_then(|pm| pm.next_proxy());
844                guard.current_proxy = next.clone();
845                proxy = next;
846            } else {
847                guard.current_proxy = proxy.clone();
848            }
849
850            if let Some(ref mut timing) = guard.adaptive_timing {
851                let request = TimingRequest::new(request_kind(method), body_size);
852                delay = timing.calculate_delay(url.host_str().unwrap_or(""), &request);
853            }
854        }
855
856        Ok((headers, anti_ctx, proxy, delay))
857    }
858}
859
860fn request_kind(method: &Method) -> RequestKind {
861    match *method {
862        Method::GET => RequestKind::Get,
863        Method::POST => RequestKind::Post,
864        Method::PUT => RequestKind::Put,
865        Method::PATCH => RequestKind::Patch,
866        Method::DELETE => RequestKind::Delete,
867        Method::HEAD => RequestKind::Head,
868        Method::OPTIONS => RequestKind::Options,
869        _ => RequestKind::Other,
870    }
871}
872
873fn to_http_headers(profile: &UserAgentProfile) -> CloudScraperResult<HeaderMap> {
874    let mut headers = HeaderMap::new();
875    for (name, value) in &profile.headers {
876        let header_name = HeaderName::from_bytes(name.as_bytes())
877            .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
878        let header_value = HeaderValue::from_str(value)
879            .map_err(|_| CloudScraperError::InvalidHeader(name.clone()))?;
880        headers.insert(header_name, header_value);
881    }
882    Ok(headers)
883}
884
885fn to_reqwest_headers(headers: &HeaderMap) -> CloudScraperResult<reqwest::header::HeaderMap> {
886    let mut map = reqwest::header::HeaderMap::new();
887    for (name, value) in headers.iter() {
888        let header_name = reqwest::header::HeaderName::from_bytes(name.as_str().as_bytes())
889            .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
890        let header_value = reqwest::header::HeaderValue::from_bytes(value.as_bytes())
891            .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
892        map.insert(header_name, header_value);
893    }
894    Ok(map)
895}
896
897fn reqwest_to_http(headers: &reqwest::header::HeaderMap) -> CloudScraperResult<HeaderMap> {
898    let mut map = HeaderMap::new();
899    for (name, value) in headers.iter() {
900        let header_name = HeaderName::from_bytes(name.as_str().as_bytes())
901            .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
902        let header_value = HeaderValue::from_bytes(value.as_bytes())
903            .map_err(|_| CloudScraperError::InvalidHeader(name.to_string()))?;
904        map.insert(header_name, header_value);
905    }
906    Ok(map)
907}