cloudscraper_rs/challenges/solvers/
managed_v3.rs1use std::collections::HashMap;
7use std::sync::Arc;
8use std::time::Duration;
9
10use html_escape::decode_html_entities;
11use once_cell::sync::Lazy;
12use rand::Rng;
13use regex::{Regex, RegexBuilder};
14use serde::{Deserialize, Serialize};
15use thiserror::Error;
16
17use crate::challenges::core::{
18 ChallengeExecutionError, ChallengeHttpClient, ChallengeHttpResponse, ChallengeResponse,
19 ChallengeSubmission, OriginalRequest, execute_challenge_submission, is_cloudflare_response,
20 origin_from_url,
21};
22use crate::external_deps::interpreters::{InterpreterError, JavascriptInterpreter};
23
24use super::ChallengeSolver;
25
26const DEFAULT_DELAY_MIN_SECS: f32 = 1.0;
27const DEFAULT_DELAY_MAX_SECS: f32 = 5.0;
28
29pub struct ManagedV3Solver {
31 interpreter: Arc<dyn JavascriptInterpreter>,
32 delay_min: Duration,
33 delay_max: Duration,
34}
35
36impl ManagedV3Solver {
37 pub fn new(interpreter: Arc<dyn JavascriptInterpreter>) -> Self {
38 Self {
39 interpreter,
40 delay_min: Duration::from_secs_f32(DEFAULT_DELAY_MIN_SECS),
41 delay_max: Duration::from_secs_f32(DEFAULT_DELAY_MAX_SECS),
42 }
43 }
44
45 pub fn with_delay_range(mut self, min: Duration, max: Duration) -> Self {
46 self.delay_min = min;
47 self.delay_max = if max < min { min } else { max };
48 self
49 }
50
51 pub fn is_challenge(response: &ChallengeResponse<'_>) -> bool {
52 is_cloudflare_response(response)
53 && matches!(response.status, 403 | 429 | 503)
54 && (V3_PLATFORM_RE.is_match(response.body)
55 || V3_CONTEXT_RE.is_match(response.body)
56 || V3_FORM_RE.is_match(response.body))
57 }
58
59 pub fn solve(
60 &self,
61 response: &ChallengeResponse<'_>,
62 ) -> Result<ChallengeSubmission, ManagedV3Error> {
63 if !Self::is_challenge(response) {
64 return Err(ManagedV3Error::NotV3Challenge);
65 }
66
67 let info = Self::extract_challenge_info(response.body)?;
68 let host = response
69 .url
70 .host_str()
71 .ok_or(ManagedV3Error::MissingHost)?
72 .to_string();
73
74 let challenge_answer = match info.vm_script {
75 Some(ref script) => self.execute_vm(&info, script, &host).unwrap_or_else(|err| {
76 log::warn!("Managed v3 VM execution failed: {err}; using fallback");
77 Self::fallback_answer(&info)
78 }),
79 None => Self::fallback_answer(&info),
80 };
81
82 let payload = Self::generate_payload(response.body, &challenge_answer)?;
83 self.build_submission(response, &info.form_action, payload)
84 }
85
86 pub async fn solve_and_submit(
87 &self,
88 client: Arc<dyn ChallengeHttpClient>,
89 response: &ChallengeResponse<'_>,
90 original_request: OriginalRequest,
91 ) -> Result<ChallengeHttpResponse, ManagedV3Error> {
92 let submission = self.solve(response)?;
93 execute_challenge_submission(client, submission, original_request)
94 .await
95 .map_err(ManagedV3Error::Submission)
96 }
97
98 fn execute_vm(
99 &self,
100 info: &ChallengeInfo,
101 vm_script: &str,
102 host: &str,
103 ) -> Result<String, ManagedV3Error> {
104 let ctx_json = serde_json::to_string(&info.ctx_data).unwrap_or_else(|_| "{}".into());
105 let opt_json = serde_json::to_string(&info.opt_data).unwrap_or_else(|_| "{}".into());
106
107 let script = format!(
108 r#"
109 var window = {{
110 location: {{
111 href: 'https://{host}/',
112 hostname: '{host}',
113 protocol: 'https:',
114 pathname: '/'
115 }},
116 navigator: {{
117 userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
118 platform: 'Win32',
119 language: 'en-US'
120 }},
121 document: {{
122 getElementById: function() {{ return {{ value: '', style: {{}} }}; }},
123 createElement: function() {{ return {{ firstChild: {{ href: 'https://{host}/' }}, style: {{}} }}; }}
124 }},
125 _cf_chl_ctx: {ctx},
126 _cf_chl_opt: {opt},
127 _cf_chl_enter: function() {{ return true; }}
128 }};
129 window.self = window;
130 window.top = window;
131 window.parent = window;
132 window.setTimeout = window.setTimeout || function(fn) {{ return fn(); }};
133 window.clearTimeout = window.clearTimeout || function() {{ return true; }};
134 window.addEventListener = window.addEventListener || function() {{ return true; }};
135 var document = window.document;
136 var navigator = window.navigator;
137 var location = window.location;
138 var _cf_chl_ctx = window._cf_chl_ctx;
139 var _cf_chl_opt = window._cf_chl_opt;
140 {vm_script}
141 if (typeof window._cf_chl_answer !== 'undefined') {{
142 window._cf_chl_answer;
143 }} else if (typeof _cf_chl_answer !== 'undefined') {{
144 _cf_chl_answer;
145 }} else {{
146 Math.random().toString(36).substring(2, 15);
147 }}
148 "#,
149 host = host,
150 ctx = ctx_json,
151 opt = opt_json,
152 vm_script = vm_script
153 );
154
155 self.interpreter
156 .execute(&script, host)
157 .map_err(ManagedV3Error::Interpreter)
158 .map(|answer| answer.trim().to_string())
159 }
160
161 fn fallback_answer(info: &ChallengeInfo) -> String {
162 if let Some(page_data) = info.opt_data.chl_page_data.as_ref() {
163 return (hash_str(page_data) % 1_000_000).to_string();
164 }
165 if let Some(cv_id) = info.ctx_data.cv_id.as_ref() {
166 return (hash_str(cv_id) % 1_000_000).to_string();
167 }
168 rand::thread_rng().gen_range(100_000..=999_999).to_string()
169 }
170
171 fn build_submission(
172 &self,
173 response: &ChallengeResponse<'_>,
174 form_action: &str,
175 mut payload: HashMap<String, String>,
176 ) -> Result<ChallengeSubmission, ManagedV3Error> {
177 let form_action = decode_html_entities(form_action).into_owned();
178 let target_url = response
179 .url
180 .join(&form_action)
181 .map_err(|err| ManagedV3Error::InvalidFormAction(form_action.clone(), err))?;
182
183 let mut headers = HashMap::new();
184 headers.insert(
185 "Content-Type".into(),
186 "application/x-www-form-urlencoded".into(),
187 );
188 headers.insert("Referer".into(), response.url.as_str().to_string());
189 headers.insert("Origin".into(), origin_from_url(response.url));
190
191 let wait = self.random_delay();
192 payload.entry("jschl_answer".into()).or_default();
193 payload.entry("cf_captcha_token".into()).or_default();
194
195 Ok(ChallengeSubmission::new(
196 http::Method::POST,
197 target_url,
198 payload,
199 headers,
200 wait,
201 ))
202 }
203
204 fn random_delay(&self) -> Duration {
205 if self.delay_max <= self.delay_min {
206 return self.delay_min;
207 }
208 let mut rng = rand::thread_rng();
209 let min = self.delay_min.as_secs_f32();
210 let max = self.delay_max.as_secs_f32();
211 Duration::from_secs_f32(rng.gen_range(min..max))
212 }
213
214 fn extract_challenge_info(body: &str) -> Result<ChallengeInfo, ManagedV3Error> {
215 let ctx_data = Self::extract_json_block(body, "window._cf_chl_ctx")?
216 .map(|json| serde_json::from_str::<ChallengeJson>(&json))
217 .transpose()
218 .map_err(ManagedV3Error::JsonParse)?
219 .unwrap_or_default();
220 let opt_data = Self::extract_json_block(body, "window._cf_chl_opt")?
221 .map(|json| serde_json::from_str::<ChallengeJson>(&json))
222 .transpose()
223 .map_err(ManagedV3Error::JsonParse)?
224 .unwrap_or_default();
225 let form_action = V3_FORM_RE
226 .captures(body)
227 .and_then(|caps| caps.get(1))
228 .map(|m| m.as_str().to_string())
229 .ok_or(ManagedV3Error::FormActionMissing)?;
230 let vm_script = Self::extract_vm_script(body);
231
232 Ok(ChallengeInfo {
233 ctx_data,
234 opt_data,
235 form_action,
236 vm_script,
237 })
238 }
239
240 fn extract_json_block(body: &str, marker: &str) -> Result<Option<String>, ManagedV3Error> {
241 let start = match body.find(marker) {
242 Some(idx) => idx,
243 None => return Ok(None),
244 };
245
246 let brace_start = match body[start..].find('{') {
247 Some(offset) => start + offset,
248 None => return Ok(None),
249 };
250
251 let mut depth = 0_i32;
252 let mut in_string = false;
253 let mut escape = false;
254
255 for (offset, ch) in body[brace_start..].char_indices() {
256 if in_string {
257 if escape {
258 escape = false;
259 continue;
260 }
261
262 match ch {
263 '\\' => {
264 escape = true;
265 }
266 '"' => {
267 in_string = false;
268 }
269 _ => {}
270 }
271 continue;
272 }
273
274 match ch {
275 '{' => {
276 depth += 1;
277 }
278 '}' => {
279 depth -= 1;
280 if depth == 0 {
281 let end = brace_start + offset;
282 return Ok(Some(body[brace_start..=end].to_string()));
283 }
284 }
285 '"' => {
286 in_string = true;
287 }
288 _ => {}
289 }
290 }
291
292 Err(ManagedV3Error::JsonExtractionFailed(marker.to_string()))
293 }
294
295 fn extract_vm_script(body: &str) -> Option<String> {
296 let enter_idx = body.find("window._cf_chl_enter")?;
297 let script_open = body[..enter_idx].rfind("<script")?;
298 let content_start = body[script_open..].find('>')? + script_open + 1;
299 let script_close = body[enter_idx..].find("</script>")? + enter_idx;
300 Some(body[content_start..script_close].trim().to_string())
301 }
302
303 fn generate_payload(
304 body: &str,
305 answer: &str,
306 ) -> Result<HashMap<String, String>, ManagedV3Error> {
307 let r_token = R_TOKEN_RE
308 .captures(body)
309 .and_then(|caps| caps.get(1))
310 .map(|m| m.as_str().to_string())
311 .ok_or(ManagedV3Error::MissingToken("r"))?;
312
313 let mut payload = HashMap::new();
314 payload.insert("r".into(), r_token);
315 payload.insert("jschl_answer".into(), answer.to_string());
316
317 for caps in INPUT_FIELD_RE.captures_iter(body) {
318 if let (Some(name), Some(value)) = (caps.get(1), caps.get(2)) {
319 let key = name.as_str();
320 if key != "jschl_answer" && !payload.contains_key(key) {
321 payload.insert(key.to_string(), value.as_str().to_string());
322 }
323 }
324 }
325
326 Ok(payload)
327 }
328}
329
330impl ChallengeSolver for ManagedV3Solver {
331 fn name(&self) -> &'static str {
332 "managed_v3"
333 }
334}
335
336#[derive(Debug, Default, Deserialize, Serialize)]
337struct ChallengeJson {
338 #[serde(rename = "cvId")]
339 cv_id: Option<String>,
340 #[serde(rename = "chlPageData")]
341 chl_page_data: Option<String>,
342 #[serde(flatten)]
343 extra: serde_json::Value,
344}
345
346struct ChallengeInfo {
347 ctx_data: ChallengeJson,
348 opt_data: ChallengeJson,
349 form_action: String,
350 vm_script: Option<String>,
351}
352
353#[derive(Debug, Error)]
354pub enum ManagedV3Error {
355 #[error("response is not a Cloudflare v3 challenge")]
356 NotV3Challenge,
357 #[error("missing host in challenge URL")]
358 MissingHost,
359 #[error("challenge form action missing")]
360 FormActionMissing,
361 #[error("missing token '{0}' in challenge page")]
362 MissingToken(&'static str),
363 #[error("invalid form action '{0}': {1}")]
364 InvalidFormAction(String, url::ParseError),
365 #[error("javascript interpreter error: {0}")]
366 Interpreter(#[source] InterpreterError),
367 #[error("challenge submission failed: {0}")]
368 Submission(#[source] ChallengeExecutionError),
369 #[error("json parse error: {0}")]
370 JsonParse(#[from] serde_json::Error),
371 #[error("failed to extract JSON block for marker '{0}'")]
372 JsonExtractionFailed(String),
373}
374
375static V3_PLATFORM_RE: Lazy<Regex> = Lazy::new(|| {
376 RegexBuilder::new(r#"cpo\.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v3"#)
377 .case_insensitive(true)
378 .dot_matches_new_line(true)
379 .build()
380 .expect("invalid v3 platform regex")
381});
382
383static V3_CONTEXT_RE: Lazy<Regex> = Lazy::new(|| {
384 RegexBuilder::new(r"window\._cf_chl_ctx\s*=")
385 .case_insensitive(true)
386 .dot_matches_new_line(true)
387 .build()
388 .expect("invalid v3 context regex")
389});
390
391static V3_FORM_RE: Lazy<Regex> = Lazy::new(|| {
392 RegexBuilder::new(
393 r#"<form[^>]*id=['"]challenge-form['"][^>]*action=['"]([^'"]*__cf_chl_rt_tk=[^'"]*)['"]"#,
394 )
395 .case_insensitive(true)
396 .dot_matches_new_line(true)
397 .build()
398 .expect("invalid v3 form regex")
399});
400
401static R_TOKEN_RE: Lazy<Regex> = Lazy::new(|| {
402 RegexBuilder::new(r#"name=['"]r['"]\s+value=['"]([^'"]+)['"]"#)
403 .case_insensitive(true)
404 .dot_matches_new_line(true)
405 .build()
406 .expect("invalid v3 r token regex")
407});
408
409static INPUT_FIELD_RE: Lazy<Regex> = Lazy::new(|| {
410 RegexBuilder::new(r#"<input[^>]*name=['"]([^'"]+)['"][^>]*value=['"]([^'"]*)['"]"#)
411 .case_insensitive(true)
412 .dot_matches_new_line(true)
413 .build()
414 .expect("invalid v3 input regex")
415});
416
417fn hash_str(input: &str) -> u64 {
418 use std::collections::hash_map::DefaultHasher;
419 use std::hash::{Hash, Hasher};
420
421 let mut hasher = DefaultHasher::new();
422 input.hash(&mut hasher);
423 hasher.finish()
424}
425
426#[cfg(test)]
427mod tests {
428 use super::*;
429 use http::{HeaderMap, Method, header::SERVER};
430 use url::Url;
431
432 struct ResponseFixture {
433 url: Url,
434 headers: HeaderMap,
435 method: Method,
436 body: String,
437 status: u16,
438 }
439
440 impl ResponseFixture {
441 fn new(body: &str, status: u16) -> Self {
442 let mut headers = HeaderMap::new();
443 headers.insert(SERVER, "cloudflare".parse().unwrap());
444 Self {
445 url: Url::parse("https://example.com/").unwrap(),
446 headers,
447 method: Method::GET,
448 body: body.to_string(),
449 status,
450 }
451 }
452
453 fn response(&self) -> ChallengeResponse<'_> {
454 ChallengeResponse {
455 url: &self.url,
456 status: self.status,
457 headers: &self.headers,
458 body: &self.body,
459 request_method: &self.method,
460 }
461 }
462 }
463
464 struct StubInterpreter;
465
466 impl JavascriptInterpreter for StubInterpreter {
467 fn solve_challenge(
468 &self,
469 _page_html: &str,
470 _host: &str,
471 ) -> Result<String, InterpreterError> {
472 Ok("stub".into())
473 }
474
475 fn execute(&self, script: &str, _host: &str) -> Result<String, InterpreterError> {
476 if script.contains("_cf_chl_answer") {
477 Ok("987654".into())
478 } else {
479 Err(InterpreterError::Execution("missing answer".into()))
480 }
481 }
482 }
483
484 fn sample_html(with_vm: bool) -> String {
485 let vm = if with_vm {
486 "<script>window._cf_chl_enter=function(){return true;};window._cf_chl_answer='123456';</script>"
487 } else {
488 ""
489 };
490
491 format!(
492 r#"
493 <html>
494 <head>
495 <script>window._cf_chl_ctx={{"cvId":"cv123"}};</script>
496 <script>window._cf_chl_opt={{"chlPageData":"page-data"}};</script>
497 </head>
498 <body>
499 <script>var cpo={{}};cpo.src="/cdn-cgi/challenge-platform/h/b/orchestrate/jsch/v3";</script>
500 <form id="challenge-form" action="/cdn-cgi/challenge-platform/h/b/orchestrate/form?__cf_chl_rt_tk=foo" method="POST">
501 <input type="hidden" name="r" value="token-r"/>
502 <input type="hidden" name="cf_chl_seq_i" value="1"/>
503 </form>
504 {vm}
505 </body>
506 </html>
507 "#,
508 vm = vm
509 )
510 }
511
512 #[test]
513 fn solve_uses_vm_answer() {
514 let html = sample_html(true);
515 let fixture = ResponseFixture::new(&html, 403);
516 let solver = ManagedV3Solver::new(Arc::new(StubInterpreter));
517 assert!(ManagedV3Solver::is_challenge(&fixture.response()));
518 let submission = solver.solve(&fixture.response()).expect("should solve");
519 assert_eq!(
520 submission.form_fields.get("jschl_answer"),
521 Some(&"987654".to_string())
522 );
523 }
524
525 #[test]
526 fn fallback_when_no_vm() {
527 let html = sample_html(false);
528 let fixture = ResponseFixture::new(&html, 403);
529 let solver = ManagedV3Solver::new(Arc::new(StubInterpreter));
530 let submission = solver.solve(&fixture.response()).expect("fallback works");
531 assert!(submission.form_fields.contains_key("jschl_answer"));
532 }
533}