cloudscraper_rs/challenges/solvers/
managed_v3.rs

1//! Solver for Cloudflare Managed Challenge v3.
2//!
3//! Executes the embedded JavaScript VM payload and applies fallback strategies
4//! when full execution is not possible.
5//!
6//! This solver handles the most sophisticated Cloudflare challenges that use
7//! JavaScript VM execution with context data (_cf_chl_ctx and _cf_chl_opt).
8//! It includes randomized delays (1-5s by default) and intelligent fallback
9//! mechanisms for when VM execution fails.
10
11use std::collections::HashMap;
12use std::sync::Arc;
13use std::time::Duration;
14
15use html_escape::decode_html_entities;
16use once_cell::sync::Lazy;
17use rand::Rng;
18use regex::{Regex, RegexBuilder};
19use serde::{Deserialize, Serialize};
20use thiserror::Error;
21
22use crate::challenges::core::{
23    ChallengeExecutionError, ChallengeHttpClient, ChallengeHttpResponse, ChallengeResponse,
24    ChallengeSubmission, OriginalRequest, execute_challenge_submission, is_cloudflare_response,
25    origin_from_url,
26};
27use crate::external_deps::interpreters::{InterpreterError, JavascriptInterpreter};
28
29use super::ChallengeSolver;
30
31const DEFAULT_DELAY_MIN_SECS: f32 = 1.0;
32const DEFAULT_DELAY_MAX_SECS: f32 = 5.0;
33
34/// Cloudflare Managed v3/V3 JavaScript challenge solver.
35pub struct ManagedV3Solver {
36    interpreter: Arc<dyn JavascriptInterpreter>,
37    delay_min: Duration,
38    delay_max: Duration,
39}
40
41impl ManagedV3Solver {
42    pub fn new(interpreter: Arc<dyn JavascriptInterpreter>) -> Self {
43        Self {
44            interpreter,
45            delay_min: Duration::from_secs_f32(DEFAULT_DELAY_MIN_SECS),
46            delay_max: Duration::from_secs_f32(DEFAULT_DELAY_MAX_SECS),
47        }
48    }
49
50    pub fn with_delay_range(mut self, min: Duration, max: Duration) -> Self {
51        self.delay_min = min;
52        self.delay_max = if max < min { min } else { max };
53        self
54    }
55
56    pub fn is_challenge(response: &ChallengeResponse<'_>) -> bool {
57        is_cloudflare_response(response)
58            && matches!(response.status, 403 | 429 | 503)
59            && (V3_PLATFORM_RE.is_match(response.body)
60                || V3_CONTEXT_RE.is_match(response.body)
61                || V3_FORM_RE.is_match(response.body))
62    }
63
64    pub fn solve(
65        &self,
66        response: &ChallengeResponse<'_>,
67    ) -> Result<ChallengeSubmission, ManagedV3Error> {
68        if !Self::is_challenge(response) {
69            return Err(ManagedV3Error::NotV3Challenge);
70        }
71
72        let info = Self::extract_challenge_info(response.body)?;
73        let host = response
74            .url
75            .host_str()
76            .ok_or(ManagedV3Error::MissingHost)?
77            .to_string();
78
79        let challenge_answer = match info.vm_script {
80            Some(ref script) => self.execute_vm(&info, script, &host).unwrap_or_else(|err| {
81                log::warn!("Managed v3 VM execution failed: {err}; using fallback");
82                Self::fallback_answer(&info)
83            }),
84            None => Self::fallback_answer(&info),
85        };
86
87        let payload = Self::generate_payload(response.body, &challenge_answer)?;
88        self.build_submission(response, &info.form_action, payload)
89    }
90
91    pub async fn solve_and_submit(
92        &self,
93        client: Arc<dyn ChallengeHttpClient>,
94        response: &ChallengeResponse<'_>,
95        original_request: OriginalRequest,
96    ) -> Result<ChallengeHttpResponse, ManagedV3Error> {
97        let submission = self.solve(response)?;
98        let result = execute_challenge_submission(client, submission, original_request)
99            .await
100            .map_err(ManagedV3Error::Submission)?;
101
102        // Check if Cloudflare rejected the v3 challenge solution with 403
103        if result.status == 403 {
104            return Err(ManagedV3Error::ChallengeSolveFailed);
105        }
106
107        Ok(result)
108    }
109
110    fn execute_vm(
111        &self,
112        info: &ChallengeInfo,
113        vm_script: &str,
114        host: &str,
115    ) -> Result<String, ManagedV3Error> {
116        let ctx_json = serde_json::to_string(&info.ctx_data).unwrap_or_else(|_| "{}".into());
117        let opt_json = serde_json::to_string(&info.opt_data).unwrap_or_else(|_| "{}".into());
118
119        let script = format!(
120            r#"
121            var window = {{
122                location: {{
123                    href: 'https://{host}/',
124                    hostname: '{host}',
125                    protocol: 'https:',
126                    pathname: '/'
127                }},
128                navigator: {{
129                    userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
130                    platform: 'Win32',
131                    language: 'en-US'
132                }},
133                document: {{
134                    getElementById: function() {{ return {{ value: '', style: {{}} }}; }},
135                    createElement: function() {{ return {{ firstChild: {{ href: 'https://{host}/' }}, style: {{}} }}; }}
136                }},
137                _cf_chl_ctx: {ctx},
138                _cf_chl_opt: {opt},
139                _cf_chl_enter: function() {{ return true; }}
140            }};
141            window.self = window;
142            window.top = window;
143            window.parent = window;
144            window.frames = window;
145            window.setTimeout = window.setTimeout || function(fn) {{ return fn(); }};
146            window.clearTimeout = window.clearTimeout || function() {{ return true; }};
147            window.addEventListener = window.addEventListener || function() {{ return true; }};
148            var document = window.document;
149            var navigator = window.navigator;
150            var location = window.location;
151            var _cf_chl_ctx = window._cf_chl_ctx;
152            var _cf_chl_opt = window._cf_chl_opt;
153            {vm_script}
154            if (typeof window._cf_chl_answer !== 'undefined') {{
155                window._cf_chl_answer;
156            }} else if (typeof _cf_chl_answer !== 'undefined') {{
157                _cf_chl_answer;
158            }} else {{
159                Math.random().toString(36).substring(2, 15);
160            }}
161            "#,
162            host = host,
163            ctx = ctx_json,
164            opt = opt_json,
165            vm_script = vm_script
166        );
167
168        self.interpreter
169            .execute(&script, host)
170            .map_err(ManagedV3Error::Interpreter)
171            .map(|answer| answer.trim().to_string())
172    }
173
174    fn fallback_answer(info: &ChallengeInfo) -> String {
175        if let Some(page_data) = info.opt_data.chl_page_data.as_ref() {
176            return (hash_str(page_data) % 1_000_000).to_string();
177        }
178        if let Some(cv_id) = info.ctx_data.cv_id.as_ref() {
179            return (hash_str(cv_id) % 1_000_000).to_string();
180        }
181        rand::thread_rng().gen_range(100_000..=999_999).to_string()
182    }
183
184    fn build_submission(
185        &self,
186        response: &ChallengeResponse<'_>,
187        form_action: &str,
188        mut payload: HashMap<String, String>,
189    ) -> Result<ChallengeSubmission, ManagedV3Error> {
190        let form_action = decode_html_entities(form_action).into_owned();
191        let target_url = response
192            .url
193            .join(&form_action)
194            .map_err(|err| ManagedV3Error::InvalidFormAction(form_action.clone(), err))?;
195
196        let mut headers = HashMap::new();
197        headers.insert(
198            "Content-Type".into(),
199            "application/x-www-form-urlencoded".into(),
200        );
201        headers.insert("Referer".into(), response.url.as_str().to_string());
202        headers.insert("Origin".into(), origin_from_url(response.url));
203
204        let wait = self.random_delay();
205        payload.entry("jschl_answer".into()).or_default();
206        payload.entry("cf_captcha_token".into()).or_default();
207
208        Ok(ChallengeSubmission::new(
209            http::Method::POST,
210            target_url,
211            payload,
212            headers,
213            wait,
214        ))
215    }
216
217    fn random_delay(&self) -> Duration {
218        if self.delay_max <= self.delay_min {
219            return self.delay_min;
220        }
221        let mut rng = rand::thread_rng();
222        let min = self.delay_min.as_secs_f32();
223        let max = self.delay_max.as_secs_f32();
224        Duration::from_secs_f32(rng.gen_range(min..max))
225    }
226
227    fn extract_challenge_info(body: &str) -> Result<ChallengeInfo, ManagedV3Error> {
228        let ctx_data = Self::extract_json_block(body, "window._cf_chl_ctx")?
229            .map(|json| serde_json::from_str::<ChallengeJson>(&json))
230            .transpose()
231            .map_err(ManagedV3Error::JsonParse)?
232            .unwrap_or_default();
233        let opt_data = Self::extract_json_block(body, "window._cf_chl_opt")?
234            .map(|json| serde_json::from_str::<ChallengeJson>(&json))
235            .transpose()
236            .map_err(ManagedV3Error::JsonParse)?
237            .unwrap_or_default();
238        let form_action = V3_FORM_RE
239            .captures(body)
240            .and_then(|caps| caps.get(1))
241            .map(|m| m.as_str().to_string())
242            .ok_or(ManagedV3Error::FormActionMissing)?;
243        let vm_script = Self::extract_vm_script(body);
244
245        Ok(ChallengeInfo {
246            ctx_data,
247            opt_data,
248            form_action,
249            vm_script,
250        })
251    }
252
253    fn extract_json_block(body: &str, marker: &str) -> Result<Option<String>, ManagedV3Error> {
254        let start = match body.find(marker) {
255            Some(idx) => idx,
256            None => return Ok(None),
257        };
258
259        let brace_start = match body[start..].find('{') {
260            Some(offset) => start + offset,
261            None => return Ok(None),
262        };
263
264        let mut depth = 0_i32;
265        let mut in_string = false;
266        let mut escape = false;
267
268        for (offset, ch) in body[brace_start..].char_indices() {
269            if in_string {
270                if escape {
271                    escape = false;
272                    continue;
273                }
274
275                match ch {
276                    '\\' => {
277                        escape = true;
278                    }
279                    '"' => {
280                        in_string = false;
281                    }
282                    _ => {}
283                }
284                continue;
285            }
286
287            match ch {
288                '{' => {
289                    depth += 1;
290                }
291                '}' => {
292                    depth -= 1;
293                    if depth == 0 {
294                        let end = brace_start + offset;
295                        return Ok(Some(body[brace_start..=end].to_string()));
296                    }
297                }
298                '"' => {
299                    in_string = true;
300                }
301                _ => {}
302            }
303        }
304
305        Err(ManagedV3Error::JsonExtractionFailed(marker.to_string()))
306    }
307
308    fn extract_vm_script(body: &str) -> Option<String> {
309        // Try to find script containing window._cf_chl_enter
310        static VM_SCRIPT_RE: Lazy<Regex> = Lazy::new(|| {
311            RegexBuilder::new(r"<script[^>]*>\s*(.*?window\._cf_chl_enter.*?)</script>")
312                .case_insensitive(true)
313                .dot_matches_new_line(true)
314                .build()
315                .expect("invalid vm script regex")
316        });
317
318        VM_SCRIPT_RE
319            .captures(body)
320            .and_then(|caps| caps.get(1))
321            .map(|m| m.as_str().trim().to_string())
322    }
323
324    fn generate_payload(
325        body: &str,
326        answer: &str,
327    ) -> Result<HashMap<String, String>, ManagedV3Error> {
328        let r_token = R_TOKEN_RE
329            .captures(body)
330            .and_then(|caps| caps.get(1))
331            .map(|m| m.as_str().to_string())
332            .ok_or(ManagedV3Error::MissingToken("r"))?;
333
334        let mut payload = HashMap::new();
335        payload.insert("r".into(), r_token);
336        payload.insert("jschl_answer".into(), answer.to_string());
337
338        for caps in INPUT_FIELD_RE.captures_iter(body) {
339            if let (Some(name), Some(value)) = (caps.get(1), caps.get(2)) {
340                let key = name.as_str();
341                if key != "jschl_answer" && !payload.contains_key(key) {
342                    payload.insert(key.to_string(), value.as_str().to_string());
343                }
344            }
345        }
346
347        Ok(payload)
348    }
349}
350
351impl ChallengeSolver for ManagedV3Solver {
352    fn name(&self) -> &'static str {
353        "managed_v3"
354    }
355}
356
357#[derive(Debug, Default, Deserialize, Serialize)]
358struct ChallengeJson {
359    #[serde(rename = "cvId")]
360    cv_id: Option<String>,
361    #[serde(rename = "chlPageData")]
362    chl_page_data: Option<String>,
363    #[serde(flatten)]
364    extra: serde_json::Value,
365}
366
367struct ChallengeInfo {
368    ctx_data: ChallengeJson,
369    opt_data: ChallengeJson,
370    form_action: String,
371    vm_script: Option<String>,
372}
373
374#[derive(Debug, Error)]
375pub enum ManagedV3Error {
376    #[error("response is not a Cloudflare v3 challenge")]
377    NotV3Challenge,
378    #[error("missing host in challenge URL")]
379    MissingHost,
380    #[error("challenge form action missing")]
381    FormActionMissing,
382    #[error("missing token '{0}' in challenge page")]
383    MissingToken(&'static str),
384    #[error("invalid form action '{0}': {1}")]
385    InvalidFormAction(String, url::ParseError),
386    #[error("javascript interpreter error: {0}")]
387    Interpreter(#[source] InterpreterError),
388    #[error("failed to solve Cloudflare v3 challenge - received 403 status")]
389    ChallengeSolveFailed,
390    #[error("challenge submission failed: {0}")]
391    Submission(#[source] ChallengeExecutionError),
392    #[error("json parse error: {0}")]
393    JsonParse(#[from] serde_json::Error),
394    #[error("failed to extract JSON block for marker '{0}'")]
395    JsonExtractionFailed(String),
396}
397
398static V3_PLATFORM_RE: Lazy<Regex> = Lazy::new(|| {
399    RegexBuilder::new(r#"cpo\.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v3"#)
400        .case_insensitive(true)
401        .dot_matches_new_line(true)
402        .build()
403        .expect("invalid v3 platform regex")
404});
405
406static V3_CONTEXT_RE: Lazy<Regex> = Lazy::new(|| {
407    RegexBuilder::new(r"window\._cf_chl_ctx\s*=")
408        .case_insensitive(true)
409        .dot_matches_new_line(true)
410        .build()
411        .expect("invalid v3 context regex")
412});
413
414static V3_FORM_RE: Lazy<Regex> = Lazy::new(|| {
415    RegexBuilder::new(
416        r#"<form[^>]*id=['"]challenge-form['"][^>]*action=['"]([^'"]*__cf_chl_rt_tk=[^'"]*)['"]"#,
417    )
418    .case_insensitive(true)
419    .dot_matches_new_line(true)
420    .build()
421    .expect("invalid v3 form regex")
422});
423
424static R_TOKEN_RE: Lazy<Regex> = Lazy::new(|| {
425    RegexBuilder::new(r#"name=['"]r['"]\s+value=['"]([^'"]+)['"]"#)
426        .case_insensitive(true)
427        .dot_matches_new_line(true)
428        .build()
429        .expect("invalid v3 r token regex")
430});
431
432static INPUT_FIELD_RE: Lazy<Regex> = Lazy::new(|| {
433    RegexBuilder::new(r#"<input[^>]*name=['"]([^'"]+)['"][^>]*value=['"]([^'"]*)['"]"#)
434        .case_insensitive(true)
435        .dot_matches_new_line(true)
436        .build()
437        .expect("invalid v3 input regex")
438});
439
440fn hash_str(input: &str) -> u64 {
441    use std::collections::hash_map::DefaultHasher;
442    use std::hash::{Hash, Hasher};
443
444    let mut hasher = DefaultHasher::new();
445    input.hash(&mut hasher);
446    hasher.finish()
447}
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452    use http::{HeaderMap, Method, header::SERVER};
453    use url::Url;
454
455    struct ResponseFixture {
456        url: Url,
457        headers: HeaderMap,
458        method: Method,
459        body: String,
460        status: u16,
461    }
462
463    impl ResponseFixture {
464        fn new(body: &str, status: u16) -> Self {
465            let mut headers = HeaderMap::new();
466            headers.insert(SERVER, "cloudflare".parse().unwrap());
467            Self {
468                url: Url::parse("https://example.com/").unwrap(),
469                headers,
470                method: Method::GET,
471                body: body.to_string(),
472                status,
473            }
474        }
475
476        fn response(&self) -> ChallengeResponse<'_> {
477            ChallengeResponse {
478                url: &self.url,
479                status: self.status,
480                headers: &self.headers,
481                body: &self.body,
482                request_method: &self.method,
483            }
484        }
485    }
486
487    struct StubInterpreter;
488
489    impl JavascriptInterpreter for StubInterpreter {
490        fn solve_challenge(
491            &self,
492            _page_html: &str,
493            _host: &str,
494        ) -> Result<String, InterpreterError> {
495            Ok("stub".into())
496        }
497
498        fn execute(&self, script: &str, _host: &str) -> Result<String, InterpreterError> {
499            if script.contains("_cf_chl_answer") {
500                Ok("987654".into())
501            } else {
502                Err(InterpreterError::Execution("missing answer".into()))
503            }
504        }
505    }
506
507    fn sample_html(with_vm: bool) -> String {
508        let vm = if with_vm {
509            "<script>window._cf_chl_enter=function(){return true;};window._cf_chl_answer='123456';</script>"
510        } else {
511            ""
512        };
513
514        format!(
515            r#"
516            <html>
517              <head>
518                <script>window._cf_chl_ctx={{"cvId":"cv123"}};</script>
519                <script>window._cf_chl_opt={{"chlPageData":"page-data"}};</script>
520              </head>
521              <body>
522                <script>var cpo={{}};cpo.src="/cdn-cgi/challenge-platform/h/b/orchestrate/jsch/v3";</script>
523                <form id="challenge-form" action="/cdn-cgi/challenge-platform/h/b/orchestrate/form?__cf_chl_rt_tk=foo" method="POST">
524                  <input type="hidden" name="r" value="token-r"/>
525                  <input type="hidden" name="cf_chl_seq_i" value="1"/>
526                </form>
527                {vm}
528              </body>
529            </html>
530        "#,
531            vm = vm
532        )
533    }
534
535    #[test]
536    fn solve_uses_vm_answer() {
537        let html = sample_html(true);
538        let fixture = ResponseFixture::new(&html, 403);
539        let solver = ManagedV3Solver::new(Arc::new(StubInterpreter));
540        assert!(ManagedV3Solver::is_challenge(&fixture.response()));
541        let submission = solver.solve(&fixture.response()).expect("should solve");
542        assert_eq!(
543            submission.form_fields.get("jschl_answer"),
544            Some(&"987654".to_string())
545        );
546    }
547
548    #[test]
549    fn fallback_when_no_vm() {
550        let html = sample_html(false);
551        let fixture = ResponseFixture::new(&html, 403);
552        let solver = ManagedV3Solver::new(Arc::new(StubInterpreter));
553        let submission = solver.solve(&fixture.response()).expect("fallback works");
554        assert!(submission.form_fields.contains_key("jschl_answer"));
555    }
556
557    #[test]
558    fn json_extraction_handles_nested_objects() {
559        let html = r#"
560            <script>
561            window._cf_chl_ctx = {
562                "cvId": "test123",
563                "nested": {
564                    "key": "value",
565                    "inner": {"deep": "data"}
566                },
567                "array": [1, 2, 3]
568            };
569            </script>
570        "#;
571
572        let result = ManagedV3Solver::extract_json_block(html, "window._cf_chl_ctx");
573        assert!(result.is_ok());
574        let json = result.unwrap();
575        assert!(json.is_some());
576        let parsed: serde_json::Value = serde_json::from_str(&json.unwrap()).unwrap();
577        assert_eq!(parsed["cvId"], "test123");
578    }
579
580    #[test]
581    fn extracts_all_input_fields() {
582        let html = r#"
583            <form id="challenge-form" action="/test?__cf_chl_rt_tk=foo">
584                <input type="hidden" name="r" value="r-token"/>
585                <input type="hidden" name="cf_chl_seq_i" value="2"/>
586                <input type="hidden" name="custom_field" value="custom_value"/>
587            </form>
588            <script>window._cf_chl_ctx={};</script>
589            <script>window._cf_chl_opt={};</script>
590        "#;
591
592        let result = ManagedV3Solver::generate_payload(html, "answer123");
593        assert!(result.is_ok());
594        let payload = result.unwrap();
595        assert_eq!(payload.get("r"), Some(&"r-token".to_string()));
596        assert_eq!(payload.get("jschl_answer"), Some(&"answer123".to_string()));
597        assert_eq!(payload.get("cf_chl_seq_i"), Some(&"2".to_string()));
598        assert_eq!(
599            payload.get("custom_field"),
600            Some(&"custom_value".to_string())
601        );
602    }
603}