cloudscraper_rs/challenges/solvers/
managed_v3.rs

1//! Solver for Cloudflare Managed Challenge v3.
2//!
3//! Executes the embedded JavaScript VM payload and applies fallback strategies
4//! when full execution is not possible.
5
6use std::collections::HashMap;
7use std::sync::Arc;
8use std::time::Duration;
9
10use html_escape::decode_html_entities;
11use once_cell::sync::Lazy;
12use rand::Rng;
13use regex::{Regex, RegexBuilder};
14use serde::{Deserialize, Serialize};
15use thiserror::Error;
16
17use crate::challenges::core::{
18    ChallengeExecutionError, ChallengeHttpClient, ChallengeHttpResponse, ChallengeResponse,
19    ChallengeSubmission, OriginalRequest, execute_challenge_submission, is_cloudflare_response,
20    origin_from_url,
21};
22use crate::external_deps::interpreters::{InterpreterError, JavascriptInterpreter};
23
24use super::ChallengeSolver;
25
26const DEFAULT_DELAY_MIN_SECS: f32 = 1.0;
27const DEFAULT_DELAY_MAX_SECS: f32 = 5.0;
28
29/// Cloudflare Managed v3/V3 JavaScript challenge solver.
30pub struct ManagedV3Solver {
31    interpreter: Arc<dyn JavascriptInterpreter>,
32    delay_min: Duration,
33    delay_max: Duration,
34}
35
36impl ManagedV3Solver {
37    pub fn new(interpreter: Arc<dyn JavascriptInterpreter>) -> Self {
38        Self {
39            interpreter,
40            delay_min: Duration::from_secs_f32(DEFAULT_DELAY_MIN_SECS),
41            delay_max: Duration::from_secs_f32(DEFAULT_DELAY_MAX_SECS),
42        }
43    }
44
45    pub fn with_delay_range(mut self, min: Duration, max: Duration) -> Self {
46        self.delay_min = min;
47        self.delay_max = if max < min { min } else { max };
48        self
49    }
50
51    pub fn is_challenge(response: &ChallengeResponse<'_>) -> bool {
52        is_cloudflare_response(response)
53            && matches!(response.status, 403 | 429 | 503)
54            && (V3_PLATFORM_RE.is_match(response.body)
55                || V3_CONTEXT_RE.is_match(response.body)
56                || V3_FORM_RE.is_match(response.body))
57    }
58
59    pub fn solve(
60        &self,
61        response: &ChallengeResponse<'_>,
62    ) -> Result<ChallengeSubmission, ManagedV3Error> {
63        if !Self::is_challenge(response) {
64            return Err(ManagedV3Error::NotV3Challenge);
65        }
66
67        let info = Self::extract_challenge_info(response.body)?;
68        let host = response
69            .url
70            .host_str()
71            .ok_or(ManagedV3Error::MissingHost)?
72            .to_string();
73
74        let challenge_answer = match info.vm_script {
75            Some(ref script) => self.execute_vm(&info, script, &host).unwrap_or_else(|err| {
76                log::warn!("Managed v3 VM execution failed: {err}; using fallback");
77                Self::fallback_answer(&info)
78            }),
79            None => Self::fallback_answer(&info),
80        };
81
82        let payload = Self::generate_payload(response.body, &challenge_answer)?;
83        self.build_submission(response, &info.form_action, payload)
84    }
85
86    pub async fn solve_and_submit(
87        &self,
88        client: Arc<dyn ChallengeHttpClient>,
89        response: &ChallengeResponse<'_>,
90        original_request: OriginalRequest,
91    ) -> Result<ChallengeHttpResponse, ManagedV3Error> {
92        let submission = self.solve(response)?;
93        execute_challenge_submission(client, submission, original_request)
94            .await
95            .map_err(ManagedV3Error::Submission)
96    }
97
98    fn execute_vm(
99        &self,
100        info: &ChallengeInfo,
101        vm_script: &str,
102        host: &str,
103    ) -> Result<String, ManagedV3Error> {
104        let ctx_json = serde_json::to_string(&info.ctx_data).unwrap_or_else(|_| "{}".into());
105        let opt_json = serde_json::to_string(&info.opt_data).unwrap_or_else(|_| "{}".into());
106
107        let script = format!(
108            r#"
109            var window = {{
110                location: {{
111                    href: 'https://{host}/',
112                    hostname: '{host}',
113                    protocol: 'https:',
114                    pathname: '/'
115                }},
116                navigator: {{
117                    userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
118                    platform: 'Win32',
119                    language: 'en-US'
120                }},
121                document: {{
122                    getElementById: function() {{ return {{ value: '', style: {{}} }}; }},
123                    createElement: function() {{ return {{ firstChild: {{ href: 'https://{host}/' }}, style: {{}} }}; }}
124                }},
125                _cf_chl_ctx: {ctx},
126                _cf_chl_opt: {opt},
127                _cf_chl_enter: function() {{ return true; }}
128            }};
129            window.self = window;
130            window.top = window;
131            window.parent = window;
132            window.setTimeout = window.setTimeout || function(fn) {{ return fn(); }};
133            window.clearTimeout = window.clearTimeout || function() {{ return true; }};
134            window.addEventListener = window.addEventListener || function() {{ return true; }};
135            var document = window.document;
136            var navigator = window.navigator;
137            var location = window.location;
138            var _cf_chl_ctx = window._cf_chl_ctx;
139            var _cf_chl_opt = window._cf_chl_opt;
140            {vm_script}
141            if (typeof window._cf_chl_answer !== 'undefined') {{
142                window._cf_chl_answer;
143            }} else if (typeof _cf_chl_answer !== 'undefined') {{
144                _cf_chl_answer;
145            }} else {{
146                Math.random().toString(36).substring(2, 15);
147            }}
148            "#,
149            host = host,
150            ctx = ctx_json,
151            opt = opt_json,
152            vm_script = vm_script
153        );
154
155        self.interpreter
156            .execute(&script, host)
157            .map_err(ManagedV3Error::Interpreter)
158            .map(|answer| answer.trim().to_string())
159    }
160
161    fn fallback_answer(info: &ChallengeInfo) -> String {
162        if let Some(page_data) = info.opt_data.chl_page_data.as_ref() {
163            return (hash_str(page_data) % 1_000_000).to_string();
164        }
165        if let Some(cv_id) = info.ctx_data.cv_id.as_ref() {
166            return (hash_str(cv_id) % 1_000_000).to_string();
167        }
168        rand::thread_rng().gen_range(100_000..=999_999).to_string()
169    }
170
171    fn build_submission(
172        &self,
173        response: &ChallengeResponse<'_>,
174        form_action: &str,
175        mut payload: HashMap<String, String>,
176    ) -> Result<ChallengeSubmission, ManagedV3Error> {
177        let form_action = decode_html_entities(form_action).into_owned();
178        let target_url = response
179            .url
180            .join(&form_action)
181            .map_err(|err| ManagedV3Error::InvalidFormAction(form_action.clone(), err))?;
182
183        let mut headers = HashMap::new();
184        headers.insert(
185            "Content-Type".into(),
186            "application/x-www-form-urlencoded".into(),
187        );
188        headers.insert("Referer".into(), response.url.as_str().to_string());
189        headers.insert("Origin".into(), origin_from_url(response.url));
190
191        let wait = self.random_delay();
192        payload.entry("jschl_answer".into()).or_default();
193        payload.entry("cf_captcha_token".into()).or_default();
194
195        Ok(ChallengeSubmission::new(
196            http::Method::POST,
197            target_url,
198            payload,
199            headers,
200            wait,
201        ))
202    }
203
204    fn random_delay(&self) -> Duration {
205        if self.delay_max <= self.delay_min {
206            return self.delay_min;
207        }
208        let mut rng = rand::thread_rng();
209        let min = self.delay_min.as_secs_f32();
210        let max = self.delay_max.as_secs_f32();
211        Duration::from_secs_f32(rng.gen_range(min..max))
212    }
213
214    fn extract_challenge_info(body: &str) -> Result<ChallengeInfo, ManagedV3Error> {
215        let ctx_data = Self::extract_json_block(body, "window._cf_chl_ctx")?
216            .map(|json| serde_json::from_str::<ChallengeJson>(&json))
217            .transpose()
218            .map_err(ManagedV3Error::JsonParse)?
219            .unwrap_or_default();
220        let opt_data = Self::extract_json_block(body, "window._cf_chl_opt")?
221            .map(|json| serde_json::from_str::<ChallengeJson>(&json))
222            .transpose()
223            .map_err(ManagedV3Error::JsonParse)?
224            .unwrap_or_default();
225        let form_action = V3_FORM_RE
226            .captures(body)
227            .and_then(|caps| caps.get(1))
228            .map(|m| m.as_str().to_string())
229            .ok_or(ManagedV3Error::FormActionMissing)?;
230        let vm_script = Self::extract_vm_script(body);
231
232        Ok(ChallengeInfo {
233            ctx_data,
234            opt_data,
235            form_action,
236            vm_script,
237        })
238    }
239
240    fn extract_json_block(body: &str, marker: &str) -> Result<Option<String>, ManagedV3Error> {
241        let start = match body.find(marker) {
242            Some(idx) => idx,
243            None => return Ok(None),
244        };
245
246        let brace_start = match body[start..].find('{') {
247            Some(offset) => start + offset,
248            None => return Ok(None),
249        };
250
251        let mut depth = 0_i32;
252        let mut in_string = false;
253        let mut escape = false;
254
255        for (offset, ch) in body[brace_start..].char_indices() {
256            if in_string {
257                if escape {
258                    escape = false;
259                    continue;
260                }
261
262                match ch {
263                    '\\' => {
264                        escape = true;
265                    }
266                    '"' => {
267                        in_string = false;
268                    }
269                    _ => {}
270                }
271                continue;
272            }
273
274            match ch {
275                '{' => {
276                    depth += 1;
277                }
278                '}' => {
279                    depth -= 1;
280                    if depth == 0 {
281                        let end = brace_start + offset;
282                        return Ok(Some(body[brace_start..=end].to_string()));
283                    }
284                }
285                '"' => {
286                    in_string = true;
287                }
288                _ => {}
289            }
290        }
291
292        Err(ManagedV3Error::JsonExtractionFailed(marker.to_string()))
293    }
294
295    fn extract_vm_script(body: &str) -> Option<String> {
296        let enter_idx = body.find("window._cf_chl_enter")?;
297        let script_open = body[..enter_idx].rfind("<script")?;
298        let content_start = body[script_open..].find('>')? + script_open + 1;
299        let script_close = body[enter_idx..].find("</script>")? + enter_idx;
300        Some(body[content_start..script_close].trim().to_string())
301    }
302
303    fn generate_payload(
304        body: &str,
305        answer: &str,
306    ) -> Result<HashMap<String, String>, ManagedV3Error> {
307        let r_token = R_TOKEN_RE
308            .captures(body)
309            .and_then(|caps| caps.get(1))
310            .map(|m| m.as_str().to_string())
311            .ok_or(ManagedV3Error::MissingToken("r"))?;
312
313        let mut payload = HashMap::new();
314        payload.insert("r".into(), r_token);
315        payload.insert("jschl_answer".into(), answer.to_string());
316
317        for caps in INPUT_FIELD_RE.captures_iter(body) {
318            if let (Some(name), Some(value)) = (caps.get(1), caps.get(2)) {
319                let key = name.as_str();
320                if key != "jschl_answer" && !payload.contains_key(key) {
321                    payload.insert(key.to_string(), value.as_str().to_string());
322                }
323            }
324        }
325
326        Ok(payload)
327    }
328}
329
330impl ChallengeSolver for ManagedV3Solver {
331    fn name(&self) -> &'static str {
332        "managed_v3"
333    }
334}
335
336#[derive(Debug, Default, Deserialize, Serialize)]
337struct ChallengeJson {
338    #[serde(rename = "cvId")]
339    cv_id: Option<String>,
340    #[serde(rename = "chlPageData")]
341    chl_page_data: Option<String>,
342    #[serde(flatten)]
343    extra: serde_json::Value,
344}
345
346struct ChallengeInfo {
347    ctx_data: ChallengeJson,
348    opt_data: ChallengeJson,
349    form_action: String,
350    vm_script: Option<String>,
351}
352
353#[derive(Debug, Error)]
354pub enum ManagedV3Error {
355    #[error("response is not a Cloudflare v3 challenge")]
356    NotV3Challenge,
357    #[error("missing host in challenge URL")]
358    MissingHost,
359    #[error("challenge form action missing")]
360    FormActionMissing,
361    #[error("missing token '{0}' in challenge page")]
362    MissingToken(&'static str),
363    #[error("invalid form action '{0}': {1}")]
364    InvalidFormAction(String, url::ParseError),
365    #[error("javascript interpreter error: {0}")]
366    Interpreter(#[source] InterpreterError),
367    #[error("challenge submission failed: {0}")]
368    Submission(#[source] ChallengeExecutionError),
369    #[error("json parse error: {0}")]
370    JsonParse(#[from] serde_json::Error),
371    #[error("failed to extract JSON block for marker '{0}'")]
372    JsonExtractionFailed(String),
373}
374
375static V3_PLATFORM_RE: Lazy<Regex> = Lazy::new(|| {
376    RegexBuilder::new(r#"cpo\.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v3"#)
377        .case_insensitive(true)
378        .dot_matches_new_line(true)
379        .build()
380        .expect("invalid v3 platform regex")
381});
382
383static V3_CONTEXT_RE: Lazy<Regex> = Lazy::new(|| {
384    RegexBuilder::new(r"window\._cf_chl_ctx\s*=")
385        .case_insensitive(true)
386        .dot_matches_new_line(true)
387        .build()
388        .expect("invalid v3 context regex")
389});
390
391static V3_FORM_RE: Lazy<Regex> = Lazy::new(|| {
392    RegexBuilder::new(
393        r#"<form[^>]*id=['"]challenge-form['"][^>]*action=['"]([^'"]*__cf_chl_rt_tk=[^'"]*)['"]"#,
394    )
395    .case_insensitive(true)
396    .dot_matches_new_line(true)
397    .build()
398    .expect("invalid v3 form regex")
399});
400
401static R_TOKEN_RE: Lazy<Regex> = Lazy::new(|| {
402    RegexBuilder::new(r#"name=['"]r['"]\s+value=['"]([^'"]+)['"]"#)
403        .case_insensitive(true)
404        .dot_matches_new_line(true)
405        .build()
406        .expect("invalid v3 r token regex")
407});
408
409static INPUT_FIELD_RE: Lazy<Regex> = Lazy::new(|| {
410    RegexBuilder::new(r#"<input[^>]*name=['"]([^'"]+)['"][^>]*value=['"]([^'"]*)['"]"#)
411        .case_insensitive(true)
412        .dot_matches_new_line(true)
413        .build()
414        .expect("invalid v3 input regex")
415});
416
417fn hash_str(input: &str) -> u64 {
418    use std::collections::hash_map::DefaultHasher;
419    use std::hash::{Hash, Hasher};
420
421    let mut hasher = DefaultHasher::new();
422    input.hash(&mut hasher);
423    hasher.finish()
424}
425
426#[cfg(test)]
427mod tests {
428    use super::*;
429    use http::{HeaderMap, Method, header::SERVER};
430    use url::Url;
431
432    struct ResponseFixture {
433        url: Url,
434        headers: HeaderMap,
435        method: Method,
436        body: String,
437        status: u16,
438    }
439
440    impl ResponseFixture {
441        fn new(body: &str, status: u16) -> Self {
442            let mut headers = HeaderMap::new();
443            headers.insert(SERVER, "cloudflare".parse().unwrap());
444            Self {
445                url: Url::parse("https://example.com/").unwrap(),
446                headers,
447                method: Method::GET,
448                body: body.to_string(),
449                status,
450            }
451        }
452
453        fn response(&self) -> ChallengeResponse<'_> {
454            ChallengeResponse {
455                url: &self.url,
456                status: self.status,
457                headers: &self.headers,
458                body: &self.body,
459                request_method: &self.method,
460            }
461        }
462    }
463
464    struct StubInterpreter;
465
466    impl JavascriptInterpreter for StubInterpreter {
467        fn solve_challenge(
468            &self,
469            _page_html: &str,
470            _host: &str,
471        ) -> Result<String, InterpreterError> {
472            Ok("stub".into())
473        }
474
475        fn execute(&self, script: &str, _host: &str) -> Result<String, InterpreterError> {
476            if script.contains("_cf_chl_answer") {
477                Ok("987654".into())
478            } else {
479                Err(InterpreterError::Execution("missing answer".into()))
480            }
481        }
482    }
483
484    fn sample_html(with_vm: bool) -> String {
485        let vm = if with_vm {
486            "<script>window._cf_chl_enter=function(){return true;};window._cf_chl_answer='123456';</script>"
487        } else {
488            ""
489        };
490
491        format!(
492            r#"
493            <html>
494              <head>
495                <script>window._cf_chl_ctx={{"cvId":"cv123"}};</script>
496                <script>window._cf_chl_opt={{"chlPageData":"page-data"}};</script>
497              </head>
498              <body>
499                <script>var cpo={{}};cpo.src="/cdn-cgi/challenge-platform/h/b/orchestrate/jsch/v3";</script>
500                <form id="challenge-form" action="/cdn-cgi/challenge-platform/h/b/orchestrate/form?__cf_chl_rt_tk=foo" method="POST">
501                  <input type="hidden" name="r" value="token-r"/>
502                  <input type="hidden" name="cf_chl_seq_i" value="1"/>
503                </form>
504                {vm}
505              </body>
506            </html>
507        "#,
508            vm = vm
509        )
510    }
511
512    #[test]
513    fn solve_uses_vm_answer() {
514        let html = sample_html(true);
515        let fixture = ResponseFixture::new(&html, 403);
516        let solver = ManagedV3Solver::new(Arc::new(StubInterpreter));
517        assert!(ManagedV3Solver::is_challenge(&fixture.response()));
518        let submission = solver.solve(&fixture.response()).expect("should solve");
519        assert_eq!(
520            submission.form_fields.get("jschl_answer"),
521            Some(&"987654".to_string())
522        );
523    }
524
525    #[test]
526    fn fallback_when_no_vm() {
527        let html = sample_html(false);
528        let fixture = ResponseFixture::new(&html, 403);
529        let solver = ManagedV3Solver::new(Arc::new(StubInterpreter));
530        let submission = solver.solve(&fixture.response()).expect("fallback works");
531        assert!(submission.form_fields.contains_key("jschl_answer"));
532    }
533}