1use once_cell::sync::Lazy;
7use regex::Regex;
8use std::collections::{HashMap, VecDeque};
9use std::time::SystemTime;
10
11use crate::challenges::core::{ChallengeResponse, is_cloudflare_response};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
15pub enum ChallengeType {
16 JavaScriptV1,
17 JavaScriptV2,
18 ManagedV3,
19 Turnstile,
20 RateLimit,
21 AccessDenied,
22 BotManagement,
23 Unknown,
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
28pub enum ResponseStrategy {
29 JsExecution,
30 AdvancedJsExecution,
31 BrowserSimulation,
32 CaptchaSolving,
33 DelayRetry,
34 ProxyRotation,
35 EnhancedEvasion,
36 None,
37}
38
39fn response_domain(response: &ChallengeResponse<'_>) -> Option<String> {
41 response.url.host_str().map(|host| host.to_lowercase())
42}
43
44#[derive(Debug, Clone)]
47struct ChallengePattern {
48 id: String,
49 name: String,
50 challenge_type: ChallengeType,
51 response_strategy: ResponseStrategy,
52 base_confidence: f32,
53 patterns: Vec<Regex>,
54 adaptive: bool,
55}
56
57impl ChallengePattern {
58 fn new(
59 id: impl Into<String>,
60 name: impl Into<String>,
61 challenge_type: ChallengeType,
62 response_strategy: ResponseStrategy,
63 base_confidence: f32,
64 raw_patterns: &[&str],
65 ) -> Self {
66 let patterns = raw_patterns
67 .iter()
68 .map(|pattern| build_regex(pattern))
69 .collect();
70
71 Self {
72 id: id.into(),
73 name: name.into(),
74 challenge_type,
75 response_strategy,
76 base_confidence,
77 patterns,
78 adaptive: false,
79 }
80 }
81
82 fn into_adaptive(mut self) -> Self {
83 self.adaptive = true;
84 self
85 }
86}
87
88static KNOWN_PATTERNS: Lazy<Vec<ChallengePattern>> = Lazy::new(|| {
90 vec![
91 ChallengePattern::new(
92 "cf_iuam_v1",
93 "Cloudflare IUAM v1",
94 ChallengeType::JavaScriptV1,
95 ResponseStrategy::JsExecution,
96 0.95,
97 &[
98 r#"<title>\s*Just a moment\.\.\.\s*</title>"#,
99 r"var s,t,o,p,b,r,e,a,k,i,n,g,f,u,l,l,y,h,a,r,d,c,o,r,e",
100 r#"setTimeout\(function\(\)\s*\{\s*var.*?\.submit\(\)"#,
101 r#"<form[^>]*id="challenge-form"[^>]*action="/[^"]*__cf_chl_f_tk="#,
102 ],
103 ),
104 ChallengePattern::new(
105 "cf_iuam_v2",
106 "Cloudflare IUAM v2",
107 ChallengeType::JavaScriptV2,
108 ResponseStrategy::AdvancedJsExecution,
109 0.90,
110 &[
111 r#"cpo\.src\s*=\s*['"]/cdn-cgi/challenge-platform/.*?orchestrate/jsch/v1"#,
112 r"window\._cf_chl_opt\s*=",
113 r#"<form[^>]*id="challenge-form"[^>]*action="/[^"]*__cf_chl_rt_tk="#,
114 ],
115 ),
116 ChallengePattern::new(
117 "cf_managed_v3",
118 "Cloudflare Managed Challenge v3",
119 ChallengeType::ManagedV3,
120 ResponseStrategy::BrowserSimulation,
121 0.92,
122 &[
123 r#"cpo\.src\s*=\s*['"]/cdn-cgi/challenge-platform/.*?orchestrate/(?:captcha|managed)/v1"#,
124 r"window\._cf_chl_ctx\s*=",
125 r#"data-ray="[A-Fa-f0-9]+""#,
126 r#"<div[^>]*class="cf-browser-verification"#,
127 ],
128 ),
129 ChallengePattern::new(
130 "cf_turnstile",
131 "Cloudflare Turnstile",
132 ChallengeType::Turnstile,
133 ResponseStrategy::CaptchaSolving,
134 0.98,
135 &[
136 r#"class="cf-turnstile""#,
137 r#"data-sitekey="[0-9A-Za-z]{40}""#,
138 r#"src="https://challenges\.cloudflare\.com/turnstile/v0/api\.js"#,
139 r"cf-turnstile-response",
140 ],
141 ),
142 ChallengePattern::new(
143 "cf_rate_limit",
144 "Cloudflare Rate Limit",
145 ChallengeType::RateLimit,
146 ResponseStrategy::DelayRetry,
147 0.99,
148 &[
149 r#"<span[^>]*class="cf-error-code">1015<"#,
150 r"You are being rate limited",
151 r#"<title>\s*Rate Limited\s*</title>"#,
152 ],
153 ),
154 ChallengePattern::new(
155 "cf_access_denied",
156 "Cloudflare Access Denied",
157 ChallengeType::AccessDenied,
158 ResponseStrategy::ProxyRotation,
159 0.99,
160 &[
161 r#"<span[^>]*class="cf-error-code">1020<"#,
162 r"Access denied",
163 r"The owner of this website has banned your access",
164 ],
165 ),
166 ChallengePattern::new(
167 "cf_bot_management",
168 "Cloudflare Bot Management",
169 ChallengeType::BotManagement,
170 ResponseStrategy::EnhancedEvasion,
171 0.95,
172 &[
173 r#"<span[^>]*class="cf-error-code">1010<"#,
174 r"Bot management",
175 r"has banned you temporarily",
176 ],
177 ),
178 ]
179});
180
181#[derive(Debug, Clone)]
183pub struct ChallengeDetection {
184 pub pattern_id: String,
185 pub pattern_name: String,
186 pub challenge_type: ChallengeType,
187 pub response_strategy: ResponseStrategy,
188 pub confidence: f32,
189 pub is_adaptive: bool,
190 pub status_code: u16,
191 pub url: String,
192 pub matched_indicators: Vec<String>,
193}
194
195#[derive(Debug, Clone)]
196struct PatternStats {
197 attempts: u32,
198 successes: u32,
199}
200
201impl PatternStats {
202 fn record(&mut self, success: bool) {
203 self.attempts = self.attempts.saturating_add(1);
204 if success {
205 self.successes = self.successes.saturating_add(1);
206 }
207 }
208
209 fn success_rate(&self) -> f32 {
210 if self.attempts == 0 {
211 0.0
212 } else {
213 self.successes as f32 / self.attempts as f32
214 }
215 }
216}
217
218#[derive(Debug, Clone)]
219struct DetectionRecord {
220 timestamp: SystemTime,
221 pattern_id: String,
222 confidence: f32,
223 url: String,
224}
225
226#[derive(Debug, Clone)]
228pub struct DetectionLogEntry {
229 pub timestamp: SystemTime,
230 pub pattern_id: String,
231 pub confidence: f32,
232 pub url: String,
233}
234
235impl From<&DetectionRecord> for DetectionLogEntry {
236 fn from(record: &DetectionRecord) -> Self {
237 Self {
238 timestamp: record.timestamp,
239 pattern_id: record.pattern_id.clone(),
240 confidence: record.confidence,
241 url: record.url.clone(),
242 }
243 }
244}
245
246#[derive(Debug)]
248pub struct ChallengeDetector {
249 known_patterns: Vec<ChallengePattern>,
250 adaptive_patterns: HashMap<String, Vec<ChallengePattern>>, stats: HashMap<String, PatternStats>,
252 history: VecDeque<DetectionRecord>,
253 max_history: usize,
254}
255
256impl Default for ChallengeDetector {
257 fn default() -> Self {
258 Self::new()
259 }
260}
261
262impl ChallengeDetector {
263 pub fn new() -> Self {
264 Self {
265 known_patterns: KNOWN_PATTERNS.clone(),
266 adaptive_patterns: HashMap::new(),
267 stats: HashMap::new(),
268 history: VecDeque::with_capacity(128),
269 max_history: 1000,
270 }
271 }
272
273 pub fn detect(&mut self, response: &ChallengeResponse<'_>) -> Option<ChallengeDetection> {
275 if !self.is_cloudflare_challenge(response) {
276 return None;
277 }
278
279 let mut best: Option<(ChallengeDetection, f32)> = None;
280
281 for pattern in &self.known_patterns {
282 if let Some((confidence, matched)) = self.evaluate_pattern(pattern, response)
283 && best
284 .as_ref()
285 .is_none_or(|(_, current)| confidence > *current)
286 {
287 best = Some((
288 ChallengeDetection {
289 pattern_id: pattern.id.clone(),
290 pattern_name: pattern.name.clone(),
291 challenge_type: pattern.challenge_type,
292 response_strategy: pattern.response_strategy,
293 confidence,
294 is_adaptive: pattern.adaptive,
295 status_code: response.status,
296 url: response.url.as_str().to_string(),
297 matched_indicators: matched,
298 },
299 confidence,
300 ));
301 }
302 }
303
304 if let Some(domain) = response_domain(response)
305 && let Some(patterns) = self.adaptive_patterns.get(&domain)
306 {
307 for pattern in patterns {
308 if let Some((confidence, matched)) = self.evaluate_pattern(pattern, response)
309 && best
310 .as_ref()
311 .is_none_or(|(_, current)| confidence > *current)
312 {
313 best = Some((
314 ChallengeDetection {
315 pattern_id: pattern.id.clone(),
316 pattern_name: pattern.name.clone(),
317 challenge_type: pattern.challenge_type,
318 response_strategy: pattern.response_strategy,
319 confidence,
320 is_adaptive: true,
321 status_code: response.status,
322 url: response.url.as_str().to_string(),
323 matched_indicators: matched,
324 },
325 confidence,
326 ));
327 }
328 }
329 }
330
331 let result = best.map(|(detection, _)| detection);
332
333 if let Some(ref detection) = result {
334 self.record_detection(detection.clone());
335 }
336
337 result
338 }
339
340 fn evaluate_pattern(
341 &self,
342 pattern: &ChallengePattern,
343 response: &ChallengeResponse<'_>,
344 ) -> Option<(f32, Vec<String>)> {
345 let matches: Vec<_> = pattern
346 .patterns
347 .iter()
348 .filter(|regex| regex.is_match(response.body))
349 .map(|regex| regex.as_str().to_string())
350 .collect();
351
352 if matches.is_empty() {
353 return None;
354 }
355
356 let total = pattern.patterns.len() as f32;
357 let mut confidence = (matches.len() as f32 / total) * pattern.base_confidence;
358
359 if let Some(stats) = self.stats.get(&pattern.id) {
360 confidence += stats.success_rate() * 0.1;
361 }
362
363 confidence = confidence.min(1.0);
364
365 if confidence < 0.5 {
366 return None;
367 }
368
369 Some((confidence, matches))
370 }
371
372 fn is_cloudflare_challenge(&self, response: &ChallengeResponse<'_>) -> bool {
373 is_cloudflare_response(response) && matches!(response.status, 403 | 429 | 503)
374 }
375
376 fn record_detection(&mut self, detection: ChallengeDetection) {
377 if self.history.len() == self.max_history {
378 self.history.pop_front();
379 }
380 self.history.push_back(DetectionRecord {
381 timestamp: SystemTime::now(),
382 pattern_id: detection.pattern_id,
383 confidence: detection.confidence,
384 url: detection.url,
385 });
386 }
387
388 pub fn detection_history(&self) -> impl Iterator<Item = DetectionLogEntry> + '_ {
390 self.history.iter().map(DetectionLogEntry::from)
391 }
392
393 pub fn learn_from_outcome(&mut self, pattern_id: &str, success: bool) {
395 let entry = self
396 .stats
397 .entry(pattern_id.to_string())
398 .or_insert(PatternStats {
399 attempts: 0,
400 successes: 0,
401 });
402 entry.record(success);
403 }
404
405 pub fn add_adaptive_pattern(
407 &mut self,
408 domain: &str,
409 pattern_name: &str,
410 raw_patterns: Vec<&str>,
411 challenge_type: ChallengeType,
412 response_strategy: ResponseStrategy,
413 ) {
414 let pattern = ChallengePattern::new(
415 format!("adaptive_{}_{}", domain, raw_patterns.len()),
416 pattern_name,
417 challenge_type,
418 response_strategy,
419 0.8,
420 &raw_patterns,
421 )
422 .into_adaptive();
423
424 self.adaptive_patterns
425 .entry(domain.to_lowercase())
426 .or_default()
427 .push(pattern);
428 }
429}
430
431fn build_regex(pattern: &str) -> Regex {
432 regex::RegexBuilder::new(pattern)
433 .case_insensitive(true)
434 .multi_line(true)
435 .dot_matches_new_line(true)
436 .build()
437 .unwrap_or_else(|err| panic!("invalid challenge detection regex `{}`: {}", pattern, err))
438}
439
440#[cfg(test)]
441mod tests {
442 use super::*;
443 use http::header::SERVER;
444 use http::{HeaderMap, Method};
445 use url::Url;
446
447 struct ResponseFixture {
448 url: Url,
449 headers: HeaderMap,
450 method: Method,
451 body: String,
452 status: u16,
453 }
454
455 impl ResponseFixture {
456 fn new(body: &str, status: u16) -> Self {
457 let mut headers = HeaderMap::new();
458 headers.insert(SERVER, "cloudflare".parse().unwrap());
459 Self {
460 url: Url::parse("https://example.com/").unwrap(),
461 headers,
462 method: Method::GET,
463 body: body.to_string(),
464 status,
465 }
466 }
467
468 fn response(&self) -> ChallengeResponse<'_> {
469 ChallengeResponse {
470 url: &self.url,
471 status: self.status,
472 headers: &self.headers,
473 body: &self.body,
474 request_method: &self.method,
475 }
476 }
477 }
478
479 #[test]
480 fn detects_turnstile() {
481 let html = r#"
482 <html><head><title>Test</title></head>
483 <body>
484 <div class="cf-turnstile" data-sitekey="0123456789ABCDEFGHIJ0123456789ABCDEFGHIJ"></div>
485 <script src="https://challenges.cloudflare.com/turnstile/v0/api.js"></script>
486 </body>
487 </html>
488 "#;
489
490 let mut detector = ChallengeDetector::new();
491 let fixture = ResponseFixture::new(html, 403);
492 let response = fixture.response();
493 let detection = detector.detect(&response).expect("should detect");
494
495 assert_eq!(detection.challenge_type, ChallengeType::Turnstile);
496 assert_eq!(
497 detection.response_strategy,
498 ResponseStrategy::CaptchaSolving
499 );
500 }
501}