cloudscraper_rs/modules/ml/
mod.rs

1//! Lightweight ML-inspired optimizer for strategy selection.
2//!
3//! Learns correlations between recorded features and bypass success rates so
4//! adaptive strategies can make informed recommendations.
5
6use rand::Rng;
7use std::collections::{HashMap, HashSet, VecDeque};
8
9/// Feature vector represented as numeric values.
10pub type FeatureVector = HashMap<String, f64>;
11
12/// Configuration for the ML optimizer.
13#[derive(Debug, Clone)]
14pub struct MLConfig {
15    pub window_size: usize,
16    pub learning_rate: f64,
17    pub min_samples: usize,
18    pub exploration_chance: f64,
19}
20
21impl Default for MLConfig {
22    fn default() -> Self {
23        Self {
24            window_size: 200,
25            learning_rate: 0.15,
26            min_samples: 20,
27            exploration_chance: 0.1,
28        }
29    }
30}
31
32/// Recommendation returned after evaluating recorded samples.
33#[derive(Debug, Clone)]
34pub struct StrategyRecommendation {
35    pub domain: String,
36    pub confidence: f64,
37    pub suggested_delay: Option<f64>,
38    pub feature_weights: HashMap<String, f64>,
39    pub notes: Vec<String>,
40}
41
42#[derive(Debug, Clone)]
43struct AttemptRecord {
44    features: FeatureVector,
45    success: bool,
46    delay_used: Option<f64>,
47}
48
49#[derive(Debug)]
50struct DomainModel {
51    attempts: VecDeque<AttemptRecord>,
52    weights: HashMap<String, f64>,
53    success_rate: f64,
54    window_size: usize,
55}
56
57impl DomainModel {
58    fn new(window_size: usize) -> Self {
59        Self {
60            attempts: VecDeque::with_capacity(window_size),
61            weights: HashMap::new(),
62            success_rate: 1.0,
63            window_size,
64        }
65    }
66
67    fn push(&mut self, record: AttemptRecord) {
68        if self.attempts.len() == self.window_size {
69            self.attempts.pop_front();
70        }
71        self.attempts.push_back(record);
72    }
73}
74
75/// ML-based optimizer wrapper.
76#[derive(Debug)]
77pub struct MLOptimizer {
78    config: MLConfig,
79    domains: HashMap<String, DomainModel>,
80}
81
82impl MLOptimizer {
83    pub fn new(config: MLConfig) -> Self {
84        Self {
85            domains: HashMap::new(),
86            config,
87        }
88    }
89
90    fn model_mut(&mut self, domain: &str) -> &mut DomainModel {
91        self.domains
92            .entry(domain.to_string())
93            .or_insert_with(|| DomainModel::new(self.config.window_size))
94    }
95
96    /// Record the outcome of a bypass attempt.
97    pub fn record_attempt(
98        &mut self,
99        domain: &str,
100        features: FeatureVector,
101        success: bool,
102        delay_used: Option<f64>,
103    ) {
104        let alpha = self.config.learning_rate;
105        let model = self.model_mut(domain);
106        model.push(AttemptRecord {
107            features,
108            success,
109            delay_used,
110        });
111
112        model.success_rate =
113            (1.0 - alpha) * model.success_rate + alpha * if success { 1.0 } else { 0.0 };
114
115        // Recalculate weights via simple correlation (success minus failure averages).
116        let mut success_sums: HashMap<String, f64> = HashMap::new();
117        let mut failure_sums: HashMap<String, f64> = HashMap::new();
118        let mut success_counts: HashMap<String, f64> = HashMap::new();
119        let mut failure_counts: HashMap<String, f64> = HashMap::new();
120
121        for attempt in &model.attempts {
122            for (feature, value) in &attempt.features {
123                if attempt.success {
124                    *success_sums.entry(feature.clone()).or_default() += value;
125                    *success_counts.entry(feature.clone()).or_default() += 1.0;
126                } else {
127                    *failure_sums.entry(feature.clone()).or_default() += value;
128                    *failure_counts.entry(feature.clone()).or_default() += 1.0;
129                }
130            }
131        }
132
133        let mut seen: HashSet<&String> = HashSet::new();
134        for feature in success_sums.keys().chain(failure_sums.keys()) {
135            if !seen.insert(feature) {
136                continue;
137            }
138
139            let success_sum = *success_sums.get(feature).unwrap_or(&0.0);
140            let success_count = *success_counts.get(feature).unwrap_or(&0.0);
141            let success_avg = if success_count > f64::EPSILON {
142                success_sum / success_count
143            } else {
144                0.0
145            };
146
147            let failure_sum = *failure_sums.get(feature).unwrap_or(&0.0);
148            let failure_count = *failure_counts.get(feature).unwrap_or(&0.0);
149            let failure_avg = if failure_count > f64::EPSILON {
150                failure_sum / failure_count
151            } else {
152                0.0
153            };
154
155            let weight = success_avg - failure_avg;
156            model.weights.insert(feature.clone(), weight);
157        }
158    }
159
160    /// Produce a recommendation for the domain based on learned weights.
161    pub fn recommend(&self, domain: &str) -> Option<StrategyRecommendation> {
162        let model = self.domains.get(domain)?;
163        if model.attempts.len() < self.config.min_samples {
164            return None;
165        }
166
167        let mut rng = rand::thread_rng();
168        let mut notes = Vec::new();
169        let confidence = model.success_rate;
170
171        let suggested_delay = if let Some(delay) = self.estimate_delay(model) {
172            notes.push(format!("using learned optimal delay {:.2}s", delay));
173            Some(delay)
174        } else if rng.gen_bool(self.config.exploration_chance.min(0.5)) {
175            let jitter = rng.gen_range(0.5..=1.5);
176            notes.push(format!("exploration jitter {:.2}", jitter));
177            Some(jitter)
178        } else {
179            None
180        };
181
182        Some(StrategyRecommendation {
183            domain: domain.to_string(),
184            confidence,
185            suggested_delay,
186            feature_weights: model.weights.clone(),
187            notes,
188        })
189    }
190
191    fn estimate_delay(&self, model: &DomainModel) -> Option<f64> {
192        let mut successful_delays: Vec<f64> = model
193            .attempts
194            .iter()
195            .filter_map(|attempt| {
196                if attempt.success {
197                    attempt.delay_used
198                } else {
199                    None
200                }
201            })
202            .collect();
203        if successful_delays.is_empty() {
204            return None;
205        }
206        successful_delays.sort_by(|a, b| a.partial_cmp(b).unwrap());
207        let median = successful_delays[successful_delays.len() / 2];
208        Some((median * 0.9).clamp(0.2, 10.0))
209    }
210
211    pub fn clear_domain(&mut self, domain: &str) {
212        self.domains.remove(domain);
213    }
214}
215
216impl Default for MLOptimizer {
217    fn default() -> Self {
218        Self::new(MLConfig::default())
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225
226    #[test]
227    fn learns_feature_weights() {
228        let mut optimizer = MLOptimizer::default();
229        for i in 0..40 {
230            let mut features = FeatureVector::new();
231            features.insert("timing".into(), 1.0);
232            features.insert("difficulty".into(), if i % 2 == 0 { 0.5 } else { 1.5 });
233            let success = i % 3 != 0;
234            optimizer.record_attempt("example.com", features, success, Some(1.0));
235        }
236
237        let recommendation = optimizer.recommend("example.com");
238        assert!(recommendation.is_some());
239        let rec = recommendation.unwrap();
240        assert!(rec.feature_weights.contains_key("timing"));
241    }
242}