cloudscraper_rs/modules/adaptive_timing/
mod.rs

1//! Adaptive timing algorithms for human-like request pacing.
2//!
3//! Provides behavioural profiles, adaptive delay calculation, circadian
4//! adjustments, and per-domain learning.
5
6use chrono::{DateTime, Local, Timelike};
7use rand::Rng;
8use std::collections::{HashMap, VecDeque};
9use std::time::{Duration, Instant};
10
11/// Behaviour profiles that control the high-level timing envelope.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13pub enum BehaviorProfile {
14    Casual,
15    Focused,
16    Research,
17    Mobile,
18}
19
20/// High-level request kinds for timing adjustments.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
22pub enum RequestKind {
23    Get,
24    Post,
25    Put,
26    Patch,
27    Delete,
28    Head,
29    Options,
30    Other,
31}
32
33impl RequestKind {
34    fn delay_multiplier(self) -> f32 {
35        match self {
36            RequestKind::Post | RequestKind::Put | RequestKind::Patch => 1.35,
37            RequestKind::Delete => 0.9,
38            RequestKind::Head | RequestKind::Options => 0.6,
39            _ => 1.0,
40        }
41    }
42}
43
44/// Configuration describing the base timing envelope for a profile.
45#[derive(Debug, Clone, Copy)]
46pub struct TimingProfile {
47    pub base_delay: f32,
48    pub min_delay: f32,
49    pub max_delay: f32,
50    pub variance_factor: f32,
51    pub burst_threshold: usize,
52    pub cooldown_multiplier: f32,
53    pub success_rate_threshold: f32,
54}
55
56impl TimingProfile {
57    fn clamp(&self, value: f32) -> f32 {
58        value.clamp(self.min_delay, self.max_delay)
59    }
60}
61
62/// Request metadata supplied to timing strategies.
63#[derive(Debug, Clone, Copy)]
64pub struct TimingRequest {
65    pub kind: RequestKind,
66    pub content_length: usize,
67}
68
69impl TimingRequest {
70    pub fn new(kind: RequestKind, content_length: usize) -> Self {
71        Self {
72            kind,
73            content_length,
74        }
75    }
76}
77
78/// Outcome recorded after each request for adaptive learning.
79#[derive(Debug, Clone, Copy)]
80pub struct TimingOutcome {
81    pub success: bool,
82    pub response_time: Duration,
83    pub applied_delay: Duration,
84}
85
86/// Snapshot of learned state for observability.
87#[derive(Debug, Clone, Copy)]
88pub struct DomainTimingSnapshot {
89    pub success_rate: f32,
90    pub consecutive_failures: u8,
91    pub average_response_time: Duration,
92    pub optimal_timing: Option<Duration>,
93}
94
95/// Interface for adaptive timing controllers.
96pub trait AdaptiveTimingStrategy: Send + Sync {
97    fn set_behavior_profile(&mut self, profile: BehaviorProfile);
98    fn behavior_profile(&self) -> BehaviorProfile;
99    fn calculate_delay(&mut self, domain: &str, request: &TimingRequest) -> Duration;
100    fn record_outcome(&mut self, domain: &str, outcome: &TimingOutcome);
101    fn snapshot(&self, domain: &str) -> Option<DomainTimingSnapshot>;
102}
103
104/// Default adaptive timing strategy that applies human-like pacing heuristics.
105#[derive(Debug)]
106pub struct DefaultAdaptiveTiming {
107    profiles: HashMap<BehaviorProfile, TimingProfile>,
108    active_profile: BehaviorProfile,
109    domain_state: HashMap<String, DomainTimingState>,
110    global_history: VecDeque<bool>,
111    last_global_request: Option<Instant>,
112}
113
114#[derive(Debug, Clone)]
115struct DomainTimingState {
116    success_rate: f32,
117    consecutive_failures: u8,
118    average_response_time: f32,
119    optimal_timing: Option<f32>,
120    last_request: Option<Instant>,
121    recent_delays: VecDeque<f32>,
122}
123
124impl Default for DomainTimingState {
125    fn default() -> Self {
126        Self {
127            success_rate: 1.0,
128            consecutive_failures: 0,
129            average_response_time: 1.0,
130            optimal_timing: None,
131            last_request: None,
132            recent_delays: VecDeque::with_capacity(32),
133        }
134    }
135}
136
137impl DefaultAdaptiveTiming {
138    pub fn new() -> Self {
139        let mut profiles = HashMap::new();
140        profiles.insert(
141            BehaviorProfile::Casual,
142            TimingProfile {
143                base_delay: 1.5,
144                min_delay: 0.5,
145                max_delay: 3.0,
146                variance_factor: 0.4,
147                burst_threshold: 3,
148                cooldown_multiplier: 1.5,
149                success_rate_threshold: 0.8,
150            },
151        );
152        profiles.insert(
153            BehaviorProfile::Focused,
154            TimingProfile {
155                base_delay: 0.9,
156                min_delay: 0.25,
157                max_delay: 2.0,
158                variance_factor: 0.3,
159                burst_threshold: 5,
160                cooldown_multiplier: 1.2,
161                success_rate_threshold: 0.85,
162            },
163        );
164        profiles.insert(
165            BehaviorProfile::Research,
166            TimingProfile {
167                base_delay: 2.5,
168                min_delay: 1.0,
169                max_delay: 6.0,
170                variance_factor: 0.6,
171                burst_threshold: 2,
172                cooldown_multiplier: 2.0,
173                success_rate_threshold: 0.7,
174            },
175        );
176        profiles.insert(
177            BehaviorProfile::Mobile,
178            TimingProfile {
179                base_delay: 1.2,
180                min_delay: 0.4,
181                max_delay: 3.0,
182                variance_factor: 0.4,
183                burst_threshold: 4,
184                cooldown_multiplier: 1.3,
185                success_rate_threshold: 0.75,
186            },
187        );
188
189        Self {
190            profiles,
191            active_profile: BehaviorProfile::Casual,
192            domain_state: HashMap::new(),
193            global_history: VecDeque::with_capacity(128),
194            last_global_request: None,
195        }
196    }
197
198    fn profile(&self) -> TimingProfile {
199        self.profiles
200            .get(&self.active_profile)
201            .copied()
202            .expect("profile missing")
203    }
204
205    fn circadian_multiplier() -> f32 {
206        let now: DateTime<Local> = Local::now();
207        let hour = now.hour() as i32;
208        let base = match hour {
209            0 => 0.3,
210            1..=3 => 0.2,
211            4 => 0.3,
212            5 => 0.4,
213            6 => 0.6,
214            7 => 0.8,
215            8 => 0.9,
216            9..=11 => 1.0,
217            12 => 0.9,
218            13 => 0.75,
219            14 => 0.85,
220            15 | 16 => 1.0,
221            17 => 0.9,
222            18 => 0.8,
223            19 => 0.7,
224            20 => 0.6,
225            21 => 0.5,
226            22 => 0.4,
227            23 => 0.3,
228            _ => 0.5,
229        };
230        let mut rng = rand::thread_rng();
231        base * rng.gen_range(0.85..=1.15)
232    }
233
234    fn ensure_domain_state(&mut self, domain: &str) -> &mut DomainTimingState {
235        self.domain_state.entry(domain.to_string()).or_default()
236    }
237
238    fn apply_human_jitter(mut delay: f32, profile: TimingProfile, content_length: usize) -> f32 {
239        let mut rng = rand::thread_rng();
240        // Reading delay heuristics
241        if content_length > 500 {
242            let words = (content_length as f32 / 5.0).max(1.0);
243            let reading_speed = rng.gen_range(200.0..=300.0);
244            let reading_time = (words / reading_speed) * 60.0;
245            let processing = rng.gen_range(0.5..=2.0);
246            delay = delay.max(reading_time + processing);
247        }
248
249        // Reaction jitter
250        let reaction_time = rng.gen_range(0.15..=0.4);
251        delay += reaction_time;
252
253        // Distraction chance
254        if rng.r#gen::<f32>() < 0.05 {
255            let distraction_delay = rng.gen_range(5.0..=60.0);
256            delay += distraction_delay;
257        }
258
259        profile.clamp(delay)
260    }
261}
262
263impl Default for DefaultAdaptiveTiming {
264    fn default() -> Self {
265        Self::new()
266    }
267}
268
269impl AdaptiveTimingStrategy for DefaultAdaptiveTiming {
270    fn set_behavior_profile(&mut self, profile: BehaviorProfile) {
271        if self.profiles.contains_key(&profile) {
272            self.active_profile = profile;
273        }
274    }
275
276    fn behavior_profile(&self) -> BehaviorProfile {
277        self.active_profile
278    }
279
280    fn calculate_delay(&mut self, domain: &str, request: &TimingRequest) -> Duration {
281        let profile = self.profile();
282        let state = self.ensure_domain_state(domain);
283
284        let mut delay = profile.base_delay * request.kind.delay_multiplier();
285        let mut rng = rand::thread_rng();
286        let variance = rng.gen_range(1.0 - profile.variance_factor..=1.0 + profile.variance_factor);
287        delay *= variance;
288
289        if state.success_rate < profile.success_rate_threshold {
290            let delta = profile.success_rate_threshold - state.success_rate;
291            delay *= 1.0 + delta.max(0.05);
292        }
293
294        if state.consecutive_failures > 0 {
295            let penalty = 1.0 + (state.consecutive_failures as f32 * 0.2);
296            delay *= penalty;
297        }
298
299        if let Some(optimal) = state.optimal_timing {
300            delay = (delay * 0.8) + (optimal * 0.2);
301        }
302
303        let response_factor = state.average_response_time.clamp(0.6, 1.5);
304        delay *= response_factor;
305
306        delay = Self::apply_human_jitter(delay, profile, request.content_length);
307
308        let circadian = Self::circadian_multiplier().max(0.2);
309        delay /= circadian;
310
311        let now = Instant::now();
312        if let Some(last) = state.last_request {
313            let min_spacing = Duration::from_secs_f32(profile.min_delay * 0.6);
314            if let Some(remaining) = min_spacing.checked_sub(now.saturating_duration_since(last)) {
315                delay = delay.max(remaining.as_secs_f32());
316            }
317        }
318
319        state.last_request = Some(now);
320        self.last_global_request = Some(now);
321
322        Duration::from_secs_f32(profile.clamp(delay))
323    }
324
325    fn record_outcome(&mut self, domain: &str, outcome: &TimingOutcome) {
326        let state = self.ensure_domain_state(domain);
327        let alpha = 0.1;
328        let success_value = if outcome.success { 1.0 } else { 0.0 };
329
330        state.success_rate = (1.0 - alpha) * state.success_rate + alpha * success_value;
331
332        if outcome.success {
333            state.consecutive_failures = 0;
334            let applied = outcome.applied_delay.as_secs_f32().min(10.0);
335            state.optimal_timing = Some(match state.optimal_timing {
336                None => applied,
337                Some(prev) => (0.9 * prev) + (0.1 * applied),
338            });
339        } else {
340            state.consecutive_failures = state.consecutive_failures.saturating_add(1).min(5);
341        }
342
343        let response_time = outcome.response_time.as_secs_f32().min(30.0);
344        state.average_response_time =
345            (1.0 - alpha) * state.average_response_time + alpha * response_time;
346
347        if state.recent_delays.len() == 32 {
348            state.recent_delays.pop_front();
349        }
350        state
351            .recent_delays
352            .push_back(outcome.applied_delay.as_secs_f32().min(10.0));
353
354        if self.global_history.len() == 256 {
355            self.global_history.pop_front();
356        }
357        self.global_history.push_back(outcome.success);
358    }
359
360    fn snapshot(&self, domain: &str) -> Option<DomainTimingSnapshot> {
361        self.domain_state
362            .get(domain)
363            .map(|state| DomainTimingSnapshot {
364                success_rate: state.success_rate,
365                consecutive_failures: state.consecutive_failures,
366                average_response_time: Duration::from_secs_f32(state.average_response_time),
367                optimal_timing: state.optimal_timing.map(Duration::from_secs_f32),
368            })
369    }
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375
376    #[test]
377    fn adaptive_timing_learns_success() {
378        let mut timing = DefaultAdaptiveTiming::new();
379        let request = TimingRequest::new(RequestKind::Get, 2000);
380        let delay1 = timing.calculate_delay("example.com", &request);
381        assert!(delay1 > Duration::from_millis(100));
382
383        for _ in 0..20 {
384            timing.record_outcome(
385                "example.com",
386                &TimingOutcome {
387                    success: true,
388                    response_time: Duration::from_secs_f32(1.2),
389                    applied_delay: delay1,
390                },
391            );
392        }
393
394        let delay2 = timing.calculate_delay("example.com", &request);
395        // After successive successes the delay should tend to decrease a bit.
396        assert!(delay2 <= delay1 * 2);
397    }
398}