cloudscraper_rs/modules/ml/
mod.rs1use rand::Rng;
7use std::collections::{HashMap, HashSet, VecDeque};
8
9pub type FeatureVector = HashMap<String, f64>;
11
12#[derive(Debug, Clone)]
14pub struct MLConfig {
15 pub window_size: usize,
16 pub learning_rate: f64,
17 pub min_samples: usize,
18 pub exploration_chance: f64,
19}
20
21impl Default for MLConfig {
22 fn default() -> Self {
23 Self {
24 window_size: 200,
25 learning_rate: 0.15,
26 min_samples: 20,
27 exploration_chance: 0.1,
28 }
29 }
30}
31
32#[derive(Debug, Clone)]
34pub struct StrategyRecommendation {
35 pub domain: String,
36 pub confidence: f64,
37 pub suggested_delay: Option<f64>,
38 pub feature_weights: HashMap<String, f64>,
39 pub notes: Vec<String>,
40}
41
42#[derive(Debug, Clone)]
43struct AttemptRecord {
44 features: FeatureVector,
45 success: bool,
46 delay_used: Option<f64>,
47}
48
49#[derive(Debug)]
50struct DomainModel {
51 attempts: VecDeque<AttemptRecord>,
52 weights: HashMap<String, f64>,
53 success_rate: f64,
54 window_size: usize,
55}
56
57impl DomainModel {
58 fn new(window_size: usize) -> Self {
59 Self {
60 attempts: VecDeque::with_capacity(window_size),
61 weights: HashMap::new(),
62 success_rate: 1.0,
63 window_size,
64 }
65 }
66
67 fn push(&mut self, record: AttemptRecord) {
68 if self.attempts.len() == self.window_size {
69 self.attempts.pop_front();
70 }
71 self.attempts.push_back(record);
72 }
73}
74
75#[derive(Debug)]
77pub struct MLOptimizer {
78 config: MLConfig,
79 domains: HashMap<String, DomainModel>,
80}
81
82impl MLOptimizer {
83 pub fn new(config: MLConfig) -> Self {
84 Self {
85 domains: HashMap::new(),
86 config,
87 }
88 }
89
90 fn model_mut(&mut self, domain: &str) -> &mut DomainModel {
91 self.domains
92 .entry(domain.to_string())
93 .or_insert_with(|| DomainModel::new(self.config.window_size))
94 }
95
96 pub fn record_attempt(
98 &mut self,
99 domain: &str,
100 features: FeatureVector,
101 success: bool,
102 delay_used: Option<f64>,
103 ) {
104 let alpha = self.config.learning_rate;
105 let model = self.model_mut(domain);
106 model.push(AttemptRecord {
107 features,
108 success,
109 delay_used,
110 });
111
112 model.success_rate =
113 (1.0 - alpha) * model.success_rate + alpha * if success { 1.0 } else { 0.0 };
114
115 let mut success_sums: HashMap<String, f64> = HashMap::new();
117 let mut failure_sums: HashMap<String, f64> = HashMap::new();
118 let mut success_counts: HashMap<String, f64> = HashMap::new();
119 let mut failure_counts: HashMap<String, f64> = HashMap::new();
120
121 for attempt in &model.attempts {
122 for (feature, value) in &attempt.features {
123 if attempt.success {
124 *success_sums.entry(feature.clone()).or_default() += value;
125 *success_counts.entry(feature.clone()).or_default() += 1.0;
126 } else {
127 *failure_sums.entry(feature.clone()).or_default() += value;
128 *failure_counts.entry(feature.clone()).or_default() += 1.0;
129 }
130 }
131 }
132
133 let mut seen: HashSet<&String> = HashSet::new();
134 for feature in success_sums.keys().chain(failure_sums.keys()) {
135 if !seen.insert(feature) {
136 continue;
137 }
138
139 let success_sum = *success_sums.get(feature).unwrap_or(&0.0);
140 let success_count = *success_counts.get(feature).unwrap_or(&0.0);
141 let success_avg = if success_count > f64::EPSILON {
142 success_sum / success_count
143 } else {
144 0.0
145 };
146
147 let failure_sum = *failure_sums.get(feature).unwrap_or(&0.0);
148 let failure_count = *failure_counts.get(feature).unwrap_or(&0.0);
149 let failure_avg = if failure_count > f64::EPSILON {
150 failure_sum / failure_count
151 } else {
152 0.0
153 };
154
155 let weight = success_avg - failure_avg;
156 model.weights.insert(feature.clone(), weight);
157 }
158 }
159
160 pub fn recommend(&self, domain: &str) -> Option<StrategyRecommendation> {
162 let model = self.domains.get(domain)?;
163 if model.attempts.len() < self.config.min_samples {
164 return None;
165 }
166
167 let mut rng = rand::thread_rng();
168 let mut notes = Vec::new();
169 let confidence = model.success_rate;
170
171 let suggested_delay = if let Some(delay) = self.estimate_delay(model) {
172 notes.push(format!("using learned optimal delay {:.2}s", delay));
173 Some(delay)
174 } else if rng.gen_bool(self.config.exploration_chance.min(0.5)) {
175 let jitter = rng.gen_range(0.5..=1.5);
176 notes.push(format!("exploration jitter {:.2}", jitter));
177 Some(jitter)
178 } else {
179 None
180 };
181
182 Some(StrategyRecommendation {
183 domain: domain.to_string(),
184 confidence,
185 suggested_delay,
186 feature_weights: model.weights.clone(),
187 notes,
188 })
189 }
190
191 fn estimate_delay(&self, model: &DomainModel) -> Option<f64> {
192 let mut successful_delays: Vec<f64> = model
193 .attempts
194 .iter()
195 .filter_map(|attempt| {
196 if attempt.success {
197 attempt.delay_used
198 } else {
199 None
200 }
201 })
202 .collect();
203 if successful_delays.is_empty() {
204 return None;
205 }
206 successful_delays.sort_by(|a, b| a.partial_cmp(b).unwrap());
207 let median = successful_delays[successful_delays.len() / 2];
208 Some((median * 0.9).clamp(0.2, 10.0))
209 }
210
211 pub fn clear_domain(&mut self, domain: &str) {
212 self.domains.remove(domain);
213 }
214}
215
216impl Default for MLOptimizer {
217 fn default() -> Self {
218 Self::new(MLConfig::default())
219 }
220}
221
222#[cfg(test)]
223mod tests {
224 use super::*;
225
226 #[test]
227 fn learns_feature_weights() {
228 let mut optimizer = MLOptimizer::default();
229 for i in 0..40 {
230 let mut features = FeatureVector::new();
231 features.insert("timing".into(), 1.0);
232 features.insert("difficulty".into(), if i % 2 == 0 { 0.5 } else { 1.5 });
233 let success = i % 3 != 0;
234 optimizer.record_attempt("example.com", features, success, Some(1.0));
235 }
236
237 let recommendation = optimizer.recommend("example.com");
238 assert!(recommendation.is_some());
239 let rec = recommendation.unwrap();
240 assert!(rec.feature_weights.contains_key("timing"));
241 }
242}