1use std::collections::HashMap;
12use std::sync::Arc;
13use std::time::Duration;
14
15use html_escape::decode_html_entities;
16use once_cell::sync::Lazy;
17use rand::Rng;
18use regex::{Regex, RegexBuilder};
19use serde::{Deserialize, Serialize};
20use thiserror::Error;
21
22use crate::challenges::core::{
23 ChallengeExecutionError, ChallengeHttpClient, ChallengeHttpResponse, ChallengeResponse,
24 ChallengeSubmission, OriginalRequest, execute_challenge_submission, is_cloudflare_response,
25 origin_from_url,
26};
27use crate::external_deps::interpreters::{InterpreterError, JavascriptInterpreter};
28
29use super::ChallengeSolver;
30
31const DEFAULT_DELAY_MIN_SECS: f32 = 1.0;
32const DEFAULT_DELAY_MAX_SECS: f32 = 5.0;
33
34pub struct ManagedV3Solver {
36 interpreter: Arc<dyn JavascriptInterpreter>,
37 delay_min: Duration,
38 delay_max: Duration,
39}
40
41impl ManagedV3Solver {
42 pub fn new(interpreter: Arc<dyn JavascriptInterpreter>) -> Self {
43 Self {
44 interpreter,
45 delay_min: Duration::from_secs_f32(DEFAULT_DELAY_MIN_SECS),
46 delay_max: Duration::from_secs_f32(DEFAULT_DELAY_MAX_SECS),
47 }
48 }
49
50 pub fn with_delay_range(mut self, min: Duration, max: Duration) -> Self {
51 self.delay_min = min;
52 self.delay_max = if max < min { min } else { max };
53 self
54 }
55
56 pub fn is_challenge(response: &ChallengeResponse<'_>) -> bool {
57 is_cloudflare_response(response)
58 && matches!(response.status, 403 | 429 | 503)
59 && (V3_PLATFORM_RE.is_match(response.body)
60 || V3_CONTEXT_RE.is_match(response.body)
61 || V3_FORM_RE.is_match(response.body))
62 }
63
64 pub fn solve(
65 &self,
66 response: &ChallengeResponse<'_>,
67 ) -> Result<ChallengeSubmission, ManagedV3Error> {
68 if !Self::is_challenge(response) {
69 return Err(ManagedV3Error::NotV3Challenge);
70 }
71
72 let info = Self::extract_challenge_info(response.body)?;
73 let host = response
74 .url
75 .host_str()
76 .ok_or(ManagedV3Error::MissingHost)?
77 .to_string();
78
79 let challenge_answer = match info.vm_script {
80 Some(ref script) => self.execute_vm(&info, script, &host).unwrap_or_else(|err| {
81 log::warn!("Managed v3 VM execution failed: {err}; using fallback");
82 Self::fallback_answer(&info)
83 }),
84 None => Self::fallback_answer(&info),
85 };
86
87 let payload = Self::generate_payload(response.body, &challenge_answer)?;
88 self.build_submission(response, &info.form_action, payload)
89 }
90
91 pub async fn solve_and_submit(
92 &self,
93 client: Arc<dyn ChallengeHttpClient>,
94 response: &ChallengeResponse<'_>,
95 original_request: OriginalRequest,
96 ) -> Result<ChallengeHttpResponse, ManagedV3Error> {
97 let submission = self.solve(response)?;
98 let result = execute_challenge_submission(client, submission, original_request)
99 .await
100 .map_err(ManagedV3Error::Submission)?;
101
102 if result.status == 403 {
104 return Err(ManagedV3Error::ChallengeSolveFailed);
105 }
106
107 Ok(result)
108 }
109
110 fn execute_vm(
111 &self,
112 info: &ChallengeInfo,
113 vm_script: &str,
114 host: &str,
115 ) -> Result<String, ManagedV3Error> {
116 let ctx_json = serde_json::to_string(&info.ctx_data).unwrap_or_else(|_| "{}".into());
117 let opt_json = serde_json::to_string(&info.opt_data).unwrap_or_else(|_| "{}".into());
118
119 let script = format!(
120 r#"
121 var window = {{
122 location: {{
123 href: 'https://{host}/',
124 hostname: '{host}',
125 protocol: 'https:',
126 pathname: '/'
127 }},
128 navigator: {{
129 userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
130 platform: 'Win32',
131 language: 'en-US'
132 }},
133 document: {{
134 getElementById: function() {{ return {{ value: '', style: {{}} }}; }},
135 createElement: function() {{ return {{ firstChild: {{ href: 'https://{host}/' }}, style: {{}} }}; }}
136 }},
137 _cf_chl_ctx: {ctx},
138 _cf_chl_opt: {opt},
139 _cf_chl_enter: function() {{ return true; }}
140 }};
141 window.self = window;
142 window.top = window;
143 window.parent = window;
144 window.frames = window;
145 window.setTimeout = window.setTimeout || function(fn) {{ return fn(); }};
146 window.clearTimeout = window.clearTimeout || function() {{ return true; }};
147 window.addEventListener = window.addEventListener || function() {{ return true; }};
148 var document = window.document;
149 var navigator = window.navigator;
150 var location = window.location;
151 var _cf_chl_ctx = window._cf_chl_ctx;
152 var _cf_chl_opt = window._cf_chl_opt;
153 {vm_script}
154 if (typeof window._cf_chl_answer !== 'undefined') {{
155 window._cf_chl_answer;
156 }} else if (typeof _cf_chl_answer !== 'undefined') {{
157 _cf_chl_answer;
158 }} else {{
159 Math.random().toString(36).substring(2, 15);
160 }}
161 "#,
162 host = host,
163 ctx = ctx_json,
164 opt = opt_json,
165 vm_script = vm_script
166 );
167
168 self.interpreter
169 .execute(&script, host)
170 .map_err(ManagedV3Error::Interpreter)
171 .map(|answer| answer.trim().to_string())
172 }
173
174 fn fallback_answer(info: &ChallengeInfo) -> String {
175 if let Some(page_data) = info.opt_data.chl_page_data.as_ref() {
176 return (hash_str(page_data) % 1_000_000).to_string();
177 }
178 if let Some(cv_id) = info.ctx_data.cv_id.as_ref() {
179 return (hash_str(cv_id) % 1_000_000).to_string();
180 }
181 rand::thread_rng().gen_range(100_000..=999_999).to_string()
182 }
183
184 fn build_submission(
185 &self,
186 response: &ChallengeResponse<'_>,
187 form_action: &str,
188 mut payload: HashMap<String, String>,
189 ) -> Result<ChallengeSubmission, ManagedV3Error> {
190 let form_action = decode_html_entities(form_action).into_owned();
191 let target_url = response
192 .url
193 .join(&form_action)
194 .map_err(|err| ManagedV3Error::InvalidFormAction(form_action.clone(), err))?;
195
196 let mut headers = HashMap::new();
197 headers.insert(
198 "Content-Type".into(),
199 "application/x-www-form-urlencoded".into(),
200 );
201 headers.insert("Referer".into(), response.url.as_str().to_string());
202 headers.insert("Origin".into(), origin_from_url(response.url));
203
204 let wait = self.random_delay();
205 payload.entry("jschl_answer".into()).or_default();
206 payload.entry("cf_captcha_token".into()).or_default();
207
208 Ok(ChallengeSubmission::new(
209 http::Method::POST,
210 target_url,
211 payload,
212 headers,
213 wait,
214 ))
215 }
216
217 fn random_delay(&self) -> Duration {
218 if self.delay_max <= self.delay_min {
219 return self.delay_min;
220 }
221 let mut rng = rand::thread_rng();
222 let min = self.delay_min.as_secs_f32();
223 let max = self.delay_max.as_secs_f32();
224 Duration::from_secs_f32(rng.gen_range(min..max))
225 }
226
227 fn extract_challenge_info(body: &str) -> Result<ChallengeInfo, ManagedV3Error> {
228 let ctx_data = Self::extract_json_block(body, "window._cf_chl_ctx")?
229 .map(|json| serde_json::from_str::<ChallengeJson>(&json))
230 .transpose()
231 .map_err(ManagedV3Error::JsonParse)?
232 .unwrap_or_default();
233 let opt_data = Self::extract_json_block(body, "window._cf_chl_opt")?
234 .map(|json| serde_json::from_str::<ChallengeJson>(&json))
235 .transpose()
236 .map_err(ManagedV3Error::JsonParse)?
237 .unwrap_or_default();
238 let form_action = V3_FORM_RE
239 .captures(body)
240 .and_then(|caps| caps.get(1))
241 .map(|m| m.as_str().to_string())
242 .ok_or(ManagedV3Error::FormActionMissing)?;
243 let vm_script = Self::extract_vm_script(body);
244
245 Ok(ChallengeInfo {
246 ctx_data,
247 opt_data,
248 form_action,
249 vm_script,
250 })
251 }
252
253 fn extract_json_block(body: &str, marker: &str) -> Result<Option<String>, ManagedV3Error> {
254 let start = match body.find(marker) {
255 Some(idx) => idx,
256 None => return Ok(None),
257 };
258
259 let brace_start = match body[start..].find('{') {
260 Some(offset) => start + offset,
261 None => return Ok(None),
262 };
263
264 let mut depth = 0_i32;
265 let mut in_string = false;
266 let mut escape = false;
267
268 for (offset, ch) in body[brace_start..].char_indices() {
269 if in_string {
270 if escape {
271 escape = false;
272 continue;
273 }
274
275 match ch {
276 '\\' => {
277 escape = true;
278 }
279 '"' => {
280 in_string = false;
281 }
282 _ => {}
283 }
284 continue;
285 }
286
287 match ch {
288 '{' => {
289 depth += 1;
290 }
291 '}' => {
292 depth -= 1;
293 if depth == 0 {
294 let end = brace_start + offset;
295 return Ok(Some(body[brace_start..=end].to_string()));
296 }
297 }
298 '"' => {
299 in_string = true;
300 }
301 _ => {}
302 }
303 }
304
305 Err(ManagedV3Error::JsonExtractionFailed(marker.to_string()))
306 }
307
308 fn extract_vm_script(body: &str) -> Option<String> {
309 static VM_SCRIPT_RE: Lazy<Regex> = Lazy::new(|| {
311 RegexBuilder::new(r"<script[^>]*>\s*(.*?window\._cf_chl_enter.*?)</script>")
312 .case_insensitive(true)
313 .dot_matches_new_line(true)
314 .build()
315 .expect("invalid vm script regex")
316 });
317
318 VM_SCRIPT_RE
319 .captures(body)
320 .and_then(|caps| caps.get(1))
321 .map(|m| m.as_str().trim().to_string())
322 }
323
324 fn generate_payload(
325 body: &str,
326 answer: &str,
327 ) -> Result<HashMap<String, String>, ManagedV3Error> {
328 let r_token = R_TOKEN_RE
329 .captures(body)
330 .and_then(|caps| caps.get(1))
331 .map(|m| m.as_str().to_string())
332 .ok_or(ManagedV3Error::MissingToken("r"))?;
333
334 let mut payload = HashMap::new();
335 payload.insert("r".into(), r_token);
336 payload.insert("jschl_answer".into(), answer.to_string());
337
338 for caps in INPUT_FIELD_RE.captures_iter(body) {
339 if let (Some(name), Some(value)) = (caps.get(1), caps.get(2)) {
340 let key = name.as_str();
341 if key != "jschl_answer" && !payload.contains_key(key) {
342 payload.insert(key.to_string(), value.as_str().to_string());
343 }
344 }
345 }
346
347 Ok(payload)
348 }
349}
350
351impl ChallengeSolver for ManagedV3Solver {
352 fn name(&self) -> &'static str {
353 "managed_v3"
354 }
355}
356
357#[derive(Debug, Default, Deserialize, Serialize)]
358struct ChallengeJson {
359 #[serde(rename = "cvId")]
360 cv_id: Option<String>,
361 #[serde(rename = "chlPageData")]
362 chl_page_data: Option<String>,
363 #[serde(flatten)]
364 extra: serde_json::Value,
365}
366
367struct ChallengeInfo {
368 ctx_data: ChallengeJson,
369 opt_data: ChallengeJson,
370 form_action: String,
371 vm_script: Option<String>,
372}
373
374#[derive(Debug, Error)]
375pub enum ManagedV3Error {
376 #[error("response is not a Cloudflare v3 challenge")]
377 NotV3Challenge,
378 #[error("missing host in challenge URL")]
379 MissingHost,
380 #[error("challenge form action missing")]
381 FormActionMissing,
382 #[error("missing token '{0}' in challenge page")]
383 MissingToken(&'static str),
384 #[error("invalid form action '{0}': {1}")]
385 InvalidFormAction(String, url::ParseError),
386 #[error("javascript interpreter error: {0}")]
387 Interpreter(#[source] InterpreterError),
388 #[error("failed to solve Cloudflare v3 challenge - received 403 status")]
389 ChallengeSolveFailed,
390 #[error("challenge submission failed: {0}")]
391 Submission(#[source] ChallengeExecutionError),
392 #[error("json parse error: {0}")]
393 JsonParse(#[from] serde_json::Error),
394 #[error("failed to extract JSON block for marker '{0}'")]
395 JsonExtractionFailed(String),
396}
397
398static V3_PLATFORM_RE: Lazy<Regex> = Lazy::new(|| {
399 RegexBuilder::new(r#"cpo\.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v3"#)
400 .case_insensitive(true)
401 .dot_matches_new_line(true)
402 .build()
403 .expect("invalid v3 platform regex")
404});
405
406static V3_CONTEXT_RE: Lazy<Regex> = Lazy::new(|| {
407 RegexBuilder::new(r"window\._cf_chl_ctx\s*=")
408 .case_insensitive(true)
409 .dot_matches_new_line(true)
410 .build()
411 .expect("invalid v3 context regex")
412});
413
414static V3_FORM_RE: Lazy<Regex> = Lazy::new(|| {
415 RegexBuilder::new(
416 r#"<form[^>]*id=['"]challenge-form['"][^>]*action=['"]([^'"]*__cf_chl_rt_tk=[^'"]*)['"]"#,
417 )
418 .case_insensitive(true)
419 .dot_matches_new_line(true)
420 .build()
421 .expect("invalid v3 form regex")
422});
423
424static R_TOKEN_RE: Lazy<Regex> = Lazy::new(|| {
425 RegexBuilder::new(r#"name=['"]r['"]\s+value=['"]([^'"]+)['"]"#)
426 .case_insensitive(true)
427 .dot_matches_new_line(true)
428 .build()
429 .expect("invalid v3 r token regex")
430});
431
432static INPUT_FIELD_RE: Lazy<Regex> = Lazy::new(|| {
433 RegexBuilder::new(r#"<input[^>]*name=['"]([^'"]+)['"][^>]*value=['"]([^'"]*)['"]"#)
434 .case_insensitive(true)
435 .dot_matches_new_line(true)
436 .build()
437 .expect("invalid v3 input regex")
438});
439
440fn hash_str(input: &str) -> u64 {
441 use std::collections::hash_map::DefaultHasher;
442 use std::hash::{Hash, Hasher};
443
444 let mut hasher = DefaultHasher::new();
445 input.hash(&mut hasher);
446 hasher.finish()
447}
448
449#[cfg(test)]
450mod tests {
451 use super::*;
452 use http::{HeaderMap, Method, header::SERVER};
453 use url::Url;
454
455 struct ResponseFixture {
456 url: Url,
457 headers: HeaderMap,
458 method: Method,
459 body: String,
460 status: u16,
461 }
462
463 impl ResponseFixture {
464 fn new(body: &str, status: u16) -> Self {
465 let mut headers = HeaderMap::new();
466 headers.insert(SERVER, "cloudflare".parse().unwrap());
467 Self {
468 url: Url::parse("https://example.com/").unwrap(),
469 headers,
470 method: Method::GET,
471 body: body.to_string(),
472 status,
473 }
474 }
475
476 fn response(&self) -> ChallengeResponse<'_> {
477 ChallengeResponse {
478 url: &self.url,
479 status: self.status,
480 headers: &self.headers,
481 body: &self.body,
482 request_method: &self.method,
483 }
484 }
485 }
486
487 struct StubInterpreter;
488
489 impl JavascriptInterpreter for StubInterpreter {
490 fn solve_challenge(
491 &self,
492 _page_html: &str,
493 _host: &str,
494 ) -> Result<String, InterpreterError> {
495 Ok("stub".into())
496 }
497
498 fn execute(&self, script: &str, _host: &str) -> Result<String, InterpreterError> {
499 if script.contains("_cf_chl_answer") {
500 Ok("987654".into())
501 } else {
502 Err(InterpreterError::Execution("missing answer".into()))
503 }
504 }
505 }
506
507 fn sample_html(with_vm: bool) -> String {
508 let vm = if with_vm {
509 "<script>window._cf_chl_enter=function(){return true;};window._cf_chl_answer='123456';</script>"
510 } else {
511 ""
512 };
513
514 format!(
515 r#"
516 <html>
517 <head>
518 <script>window._cf_chl_ctx={{"cvId":"cv123"}};</script>
519 <script>window._cf_chl_opt={{"chlPageData":"page-data"}};</script>
520 </head>
521 <body>
522 <script>var cpo={{}};cpo.src="/cdn-cgi/challenge-platform/h/b/orchestrate/jsch/v3";</script>
523 <form id="challenge-form" action="/cdn-cgi/challenge-platform/h/b/orchestrate/form?__cf_chl_rt_tk=foo" method="POST">
524 <input type="hidden" name="r" value="token-r"/>
525 <input type="hidden" name="cf_chl_seq_i" value="1"/>
526 </form>
527 {vm}
528 </body>
529 </html>
530 "#,
531 vm = vm
532 )
533 }
534
535 #[test]
536 fn solve_uses_vm_answer() {
537 let html = sample_html(true);
538 let fixture = ResponseFixture::new(&html, 403);
539 let solver = ManagedV3Solver::new(Arc::new(StubInterpreter));
540 assert!(ManagedV3Solver::is_challenge(&fixture.response()));
541 let submission = solver.solve(&fixture.response()).expect("should solve");
542 assert_eq!(
543 submission.form_fields.get("jschl_answer"),
544 Some(&"987654".to_string())
545 );
546 }
547
548 #[test]
549 fn fallback_when_no_vm() {
550 let html = sample_html(false);
551 let fixture = ResponseFixture::new(&html, 403);
552 let solver = ManagedV3Solver::new(Arc::new(StubInterpreter));
553 let submission = solver.solve(&fixture.response()).expect("fallback works");
554 assert!(submission.form_fields.contains_key("jschl_answer"));
555 }
556
557 #[test]
558 fn json_extraction_handles_nested_objects() {
559 let html = r#"
560 <script>
561 window._cf_chl_ctx = {
562 "cvId": "test123",
563 "nested": {
564 "key": "value",
565 "inner": {"deep": "data"}
566 },
567 "array": [1, 2, 3]
568 };
569 </script>
570 "#;
571
572 let result = ManagedV3Solver::extract_json_block(html, "window._cf_chl_ctx");
573 assert!(result.is_ok());
574 let json = result.unwrap();
575 assert!(json.is_some());
576 let parsed: serde_json::Value = serde_json::from_str(&json.unwrap()).unwrap();
577 assert_eq!(parsed["cvId"], "test123");
578 }
579
580 #[test]
581 fn extracts_all_input_fields() {
582 let html = r#"
583 <form id="challenge-form" action="/test?__cf_chl_rt_tk=foo">
584 <input type="hidden" name="r" value="r-token"/>
585 <input type="hidden" name="cf_chl_seq_i" value="2"/>
586 <input type="hidden" name="custom_field" value="custom_value"/>
587 </form>
588 <script>window._cf_chl_ctx={};</script>
589 <script>window._cf_chl_opt={};</script>
590 "#;
591
592 let result = ManagedV3Solver::generate_payload(html, "answer123");
593 assert!(result.is_ok());
594 let payload = result.unwrap();
595 assert_eq!(payload.get("r"), Some(&"r-token".to_string()));
596 assert_eq!(payload.get("jschl_answer"), Some(&"answer123".to_string()));
597 assert_eq!(payload.get("cf_chl_seq_i"), Some(&"2".to_string()));
598 assert_eq!(
599 payload.get("custom_field"),
600 Some(&"custom_value".to_string())
601 );
602 }
603}