cloudscraper_rs/modules/state/
mod.rs

1//! Shared state management for domains.
2//!
3//! Maintains per-domain telemetry, request history, and adaptive signals while
4//! staying lightweight for async callers.
5
6use chrono::{DateTime, Utc};
7use serde_json::Value;
8use std::collections::{HashMap, VecDeque};
9use std::sync::{Arc, RwLock};
10use std::time::Duration;
11
12use crate::challenges::solvers::FailureRecorder;
13
14const ERROR_HISTORY_LIMIT: usize = 50;
15const RECENT_DELAY_LIMIT: usize = 32;
16
17fn chrono_duration(duration: Duration) -> chrono::Duration {
18    chrono::Duration::from_std(duration).unwrap_or_else(|_| {
19        let millis = duration.as_millis().min(i64::MAX as u128);
20        chrono::Duration::milliseconds(millis as i64)
21    })
22}
23
24#[derive(Debug, Clone)]
25pub struct TimingState {
26    pub success_rate: f32,
27    pub avg_response_time_secs: f32,
28    pub consecutive_failures: u8,
29    pub optimal_delay: Option<Duration>,
30    pub recent_delays: VecDeque<Duration>,
31}
32
33impl Default for TimingState {
34    fn default() -> Self {
35        Self {
36            success_rate: 1.0,
37            avg_response_time_secs: 1.0,
38            consecutive_failures: 0,
39            optimal_delay: None,
40            recent_delays: VecDeque::with_capacity(RECENT_DELAY_LIMIT),
41        }
42    }
43}
44
45impl TimingState {
46    pub fn register_outcome(
47        &mut self,
48        success: bool,
49        response_time: Duration,
50        applied_delay: Duration,
51    ) {
52        self.apply_boolean_outcome(success);
53
54        let alpha = 0.05;
55        let response_secs = response_time.as_secs_f32();
56        if self.avg_response_time_secs <= 0.0 {
57            self.avg_response_time_secs = response_secs;
58        } else {
59            self.avg_response_time_secs =
60                (1.0 - alpha) * self.avg_response_time_secs + alpha * response_secs;
61        }
62
63        if success {
64            let delay_secs = applied_delay.as_secs_f32();
65            self.optimal_delay = Some(match self.optimal_delay {
66                None => applied_delay,
67                Some(current) => {
68                    let blended = (1.0 - alpha) * current.as_secs_f32() + alpha * delay_secs;
69                    Duration::from_secs_f32(blended)
70                }
71            });
72        }
73
74        self.recent_delays.push_back(applied_delay);
75        if self.recent_delays.len() > RECENT_DELAY_LIMIT {
76            self.recent_delays.pop_front();
77        }
78    }
79
80    pub fn apply_boolean_outcome(&mut self, success: bool) {
81        let alpha = 0.05;
82        let target = if success { 1.0 } else { 0.0 };
83        self.success_rate = (1.0 - alpha) * self.success_rate + alpha * target;
84
85        if success {
86            self.consecutive_failures = 0;
87        } else {
88            self.consecutive_failures = self.consecutive_failures.saturating_add(1);
89        }
90    }
91}
92
93#[derive(Debug, Clone)]
94pub struct TimingPatternState {
95    pub last_request: Option<DateTime<Utc>>,
96    pub avg_interval: Duration,
97    pub variance: Duration,
98}
99
100impl Default for TimingPatternState {
101    fn default() -> Self {
102        Self {
103            last_request: None,
104            avg_interval: Duration::from_secs_f32(2.0),
105            variance: Duration::from_secs_f32(1.0),
106        }
107    }
108}
109
110impl TimingPatternState {
111    pub fn mark_request(&mut self, now: DateTime<Utc>) {
112        self.last_request = Some(now);
113    }
114
115    pub fn update_targets(&mut self, avg_interval: Duration, variance: Duration) {
116        self.avg_interval = avg_interval;
117        self.variance = variance;
118    }
119}
120
121#[derive(Debug, Clone)]
122pub struct BurstState {
123    pub window: VecDeque<DateTime<Utc>>,
124    pub max_burst: u32,
125    pub window_size: Duration,
126    pub cooldown_base: Duration,
127    pub cooldown_until: Option<DateTime<Utc>>,
128}
129
130impl Default for BurstState {
131    fn default() -> Self {
132        Self {
133            window: VecDeque::with_capacity(32),
134            max_burst: 5,
135            window_size: Duration::from_secs(60),
136            cooldown_base: Duration::from_secs(10),
137            cooldown_until: None,
138        }
139    }
140}
141
142impl BurstState {
143    pub fn record(&mut self, timestamp: DateTime<Utc>) {
144        let horizon = chrono_duration(self.window_size);
145        while let Some(front) = self.window.front().cloned() {
146            if front + horizon < timestamp {
147                self.window.pop_front();
148            } else {
149                break;
150            }
151        }
152        self.window.push_back(timestamp);
153    }
154
155    pub fn set_cooldown(&mut self, duration: Duration) {
156        self.cooldown_until = Some(Utc::now() + chrono_duration(duration));
157    }
158
159    pub fn cooldown_remaining(&self, now: DateTime<Utc>) -> Option<Duration> {
160        self.cooldown_until
161            .and_then(|until| (until > now).then(|| (until - now).to_std().ok()).flatten())
162    }
163}
164
165#[derive(Debug, Clone)]
166pub struct SessionState {
167    pub id: Option<String>,
168    pub created_at: Option<DateTime<Utc>>,
169    pub last_activity: Option<DateTime<Utc>>,
170    pub min_interval: Duration,
171    pub request_count: u32,
172}
173
174impl Default for SessionState {
175    fn default() -> Self {
176        Self {
177            id: None,
178            created_at: None,
179            last_activity: None,
180            min_interval: Duration::from_millis(500),
181            request_count: 0,
182        }
183    }
184}
185
186impl SessionState {
187    pub fn ensure_initialized(&mut self, now: DateTime<Utc>) {
188        if self.id.is_none() {
189            self.id = Some(format!("sess-{}", now.timestamp_millis()));
190            self.created_at = Some(now);
191        }
192    }
193
194    pub fn touch(&mut self, now: DateTime<Utc>) {
195        self.ensure_initialized(now);
196        self.last_activity = Some(now);
197        self.request_count = self.request_count.saturating_add(1);
198    }
199}
200
201#[derive(Debug, Clone, Default)]
202pub struct FingerprintProfile {
203    pub gpu_vendor: Option<String>,
204    pub performance_tier: Option<String>,
205    pub browser_type: Option<String>,
206    pub operating_system: Option<String>,
207    pub last_updated: Option<DateTime<Utc>>,
208    pub canvas_hash: Option<String>,
209    pub webgl_hash: Option<String>,
210}
211
212impl FingerprintProfile {
213    pub fn update_profile(
214        &mut self,
215        gpu_vendor: Option<String>,
216        performance_tier: Option<String>,
217        browser_type: Option<String>,
218        operating_system: Option<String>,
219    ) {
220        self.gpu_vendor = gpu_vendor;
221        self.performance_tier = performance_tier;
222        self.browser_type = browser_type;
223        self.operating_system = operating_system;
224        self.last_updated = Some(Utc::now());
225    }
226
227    pub fn update_hashes(&mut self, canvas_hash: Option<String>, webgl_hash: Option<String>) {
228        if canvas_hash.is_some() {
229            self.canvas_hash = canvas_hash;
230        }
231        if webgl_hash.is_some() {
232            self.webgl_hash = webgl_hash;
233        }
234        self.last_updated = Some(Utc::now());
235    }
236}
237
238#[derive(Debug, Clone, Default)]
239pub struct MlStrategyState {
240    pub last_strategy: Option<String>,
241    pub success_counter: u32,
242    pub failure_counter: u32,
243    pub last_updated: Option<DateTime<Utc>>,
244}
245
246impl MlStrategyState {
247    pub fn record(&mut self, strategy: &str, success: bool) {
248        self.last_strategy = Some(strategy.to_string());
249        if success {
250            self.success_counter = self.success_counter.saturating_add(1);
251        } else {
252            self.failure_counter = self.failure_counter.saturating_add(1);
253        }
254        self.last_updated = Some(Utc::now());
255    }
256}
257
258#[derive(Debug, Clone)]
259pub struct DomainErrorRecord {
260    pub timestamp: DateTime<Utc>,
261    pub code: Option<u16>,
262    pub message: String,
263}
264
265#[derive(Debug, Clone)]
266pub struct DomainState {
267    pub last_success: Option<DateTime<Utc>>,
268    pub last_error: Option<String>,
269    pub failure_streak: u32,
270    pub success_streak: u32,
271    pub timing: TimingState,
272    pub timing_pattern: TimingPatternState,
273    pub burst: BurstState,
274    pub session: SessionState,
275    pub fingerprint: FingerprintProfile,
276    pub ml: MlStrategyState,
277    pub recent_errors: VecDeque<DomainErrorRecord>,
278    pub cookies: HashMap<String, String>,
279    pub sticky_headers: HashMap<String, String>,
280    pub metadata: HashMap<String, Value>,
281}
282
283impl Default for DomainState {
284    fn default() -> Self {
285        Self {
286            last_success: None,
287            last_error: None,
288            failure_streak: 0,
289            success_streak: 0,
290            timing: TimingState::default(),
291            timing_pattern: TimingPatternState::default(),
292            burst: BurstState::default(),
293            session: SessionState::default(),
294            fingerprint: FingerprintProfile::default(),
295            ml: MlStrategyState::default(),
296            recent_errors: VecDeque::with_capacity(ERROR_HISTORY_LIMIT),
297            cookies: HashMap::new(),
298            sticky_headers: HashMap::new(),
299            metadata: HashMap::new(),
300        }
301    }
302}
303
304impl DomainState {
305    pub fn record_success(&mut self) {
306        self.record_outcome(true, None, None, None);
307    }
308
309    pub fn record_failure(&mut self, error: impl Into<String>) {
310        self.record_outcome(false, None, None, Some(error.into()));
311    }
312
313    pub fn record_outcome(
314        &mut self,
315        success: bool,
316        response_time: Option<Duration>,
317        applied_delay: Option<Duration>,
318        error: Option<String>,
319    ) {
320        let now = Utc::now();
321        if success {
322            self.success_streak = self.success_streak.saturating_add(1);
323            self.failure_streak = 0;
324            self.last_success = Some(now);
325            self.last_error = None;
326            self.recent_errors.clear();
327        } else {
328            self.failure_streak = self.failure_streak.saturating_add(1);
329            self.success_streak = 0;
330            if let Some(ref err) = error {
331                self.last_error = Some(err.clone());
332            }
333        }
334
335        match (response_time, applied_delay) {
336            (Some(response), Some(delay)) => {
337                self.timing.register_outcome(success, response, delay);
338            }
339            _ => {
340                self.timing.apply_boolean_outcome(success);
341            }
342        }
343
344        if !success {
345            let message = error.unwrap_or_else(|| "unknown error".to_string());
346            self.push_error(None, message);
347        }
348    }
349
350    pub fn record_outcome_with_metrics(
351        &mut self,
352        success: bool,
353        response_time: Duration,
354        applied_delay: Duration,
355        error: Option<String>,
356    ) {
357        self.record_outcome(success, Some(response_time), Some(applied_delay), error);
358    }
359
360    pub fn push_error(&mut self, code: Option<u16>, message: impl Into<String>) {
361        let msg = message.into();
362        self.last_error = Some(msg.clone());
363        self.recent_errors.push_back(DomainErrorRecord {
364            timestamp: Utc::now(),
365            code,
366            message: msg,
367        });
368        if self.recent_errors.len() > ERROR_HISTORY_LIMIT {
369            self.recent_errors.pop_front();
370        }
371    }
372
373    pub fn set_cookie(&mut self, key: impl Into<String>, value: impl Into<String>) {
374        self.cookies.insert(key.into(), value.into());
375    }
376
377    pub fn set_header(&mut self, key: impl Into<String>, value: impl Into<String>) {
378        self.sticky_headers.insert(key.into(), value.into());
379    }
380
381    pub fn set_metadata(&mut self, key: impl Into<String>, value: Value) {
382        self.metadata.insert(key.into(), value);
383    }
384
385    pub fn mark_request(&mut self) {
386        let now = Utc::now();
387        self.timing_pattern.mark_request(now);
388        self.session.touch(now);
389        self.burst.record(now);
390    }
391
392    pub fn update_timing_targets(&mut self, avg_interval: Duration, variance: Duration) {
393        self.timing_pattern.update_targets(avg_interval, variance);
394    }
395
396    pub fn update_session_min_interval(&mut self, interval: Duration) {
397        self.session.min_interval = interval;
398    }
399}
400
401/// Thread-safe state manager.
402#[derive(Clone, Debug)]
403pub struct StateManager {
404    inner: Arc<RwLock<HashMap<String, DomainState>>>,
405}
406
407impl StateManager {
408    pub fn new() -> Self {
409        Self {
410            inner: Arc::new(RwLock::new(HashMap::new())),
411        }
412    }
413
414    pub fn get(&self, domain: &str) -> Option<DomainState> {
415        self.inner
416            .read()
417            .ok()
418            .and_then(|map| map.get(domain).cloned())
419    }
420
421    pub fn get_or_create(&self, domain: &str) -> DomainState {
422        let mut guard = self.inner.write().expect("state lock poisoned");
423        guard.entry(domain.to_string()).or_default().clone()
424    }
425
426    pub fn update<F>(&self, domain: &str, mut f: F)
427    where
428        F: FnMut(&mut DomainState),
429    {
430        if let Ok(mut guard) = self.inner.write() {
431            let state = guard.entry(domain.to_string()).or_default();
432            f(state);
433        }
434    }
435
436    pub fn record_success(&self, domain: &str) {
437        self.update(domain, |state| state.record_success());
438    }
439
440    pub fn record_failure(&self, domain: &str, error: impl Into<String>) {
441        let message = error.into();
442        self.update(domain, |state| state.record_failure(message.clone()));
443    }
444
445    pub fn record_outcome(
446        &self,
447        domain: &str,
448        success: bool,
449        response_time: Option<Duration>,
450        applied_delay: Option<Duration>,
451        error: Option<String>,
452    ) {
453        self.update(domain, |state| {
454            state.record_outcome(success, response_time, applied_delay, error.clone());
455        });
456    }
457
458    pub fn mark_request(&self, domain: &str) {
459        self.update(domain, |state| state.mark_request());
460    }
461
462    pub fn push_error(&self, domain: &str, code: Option<u16>, message: impl Into<String>) {
463        let msg = message.into();
464        self.update(domain, |state| state.push_error(code, msg.clone()));
465    }
466
467    pub fn clear(&self, domain: &str) {
468        if let Ok(mut guard) = self.inner.write() {
469            guard.remove(domain);
470        }
471    }
472
473    pub fn clear_all(&self) {
474        if let Ok(mut guard) = self.inner.write() {
475            guard.clear();
476        }
477    }
478}
479
480impl Default for StateManager {
481    fn default() -> Self {
482        Self::new()
483    }
484}
485
486impl FailureRecorder for StateManager {
487    fn record_failure(&self, domain: &str, reason: &str) {
488        StateManager::record_failure(self, domain, reason.to_string());
489    }
490}
491
492#[cfg(test)]
493mod tests {
494    use super::*;
495
496    #[test]
497    fn tracks_success_and_failure() {
498        let manager = StateManager::new();
499        manager.record_failure("example.com", "timeout");
500        manager.record_success("example.com");
501        let state = manager.get("example.com").unwrap();
502        assert_eq!(state.failure_streak, 0);
503        assert_eq!(state.success_streak, 1);
504        assert!(state.last_success.is_some());
505        assert!(state.recent_errors.is_empty());
506    }
507}