cloudscraper_rs/challenges/solvers/
javascript_v2.rs

1//! Solver for Cloudflare JavaScript VM challenge v2.
2//!
3//! Extracts orchestration metadata embedded in the challenge page, prepares the
4//! expected payload (including optional hCaptcha tokens), and relies on the
5//! shared executor to perform the delayed submission.
6//!
7//! This solver implements the challenge-platform orchestrate flow used by modern
8//! Cloudflare protections. It includes randomized delays (1-5s by default) to
9//! mimic browser-like behavior and reduce detection risk.
10
11use std::collections::HashMap;
12use std::sync::Arc;
13use std::time::Duration;
14
15use html_escape::decode_html_entities;
16use once_cell::sync::Lazy;
17use rand::Rng;
18use regex::{Regex, RegexBuilder};
19use serde::Deserialize;
20use thiserror::Error;
21
22use crate::challenges::core::{
23    ChallengeExecutionError, ChallengeHttpClient, ChallengeHttpResponse, ChallengeResponse,
24    ChallengeSubmission, OriginalRequest, execute_challenge_submission, is_cloudflare_response,
25    origin_from_url,
26};
27use crate::external_deps::captcha::{CaptchaError, CaptchaProvider, CaptchaTask};
28
29/// Default minimum random wait (seconds) before submitting the response.
30const DEFAULT_DELAY_MIN_SECS: f32 = 1.0;
31/// Default maximum random wait (seconds) before submitting the response.
32const DEFAULT_DELAY_MAX_SECS: f32 = 5.0;
33
34/// Solver capable of handling Cloudflare VM (v2) JavaScript challenges.
35pub struct JavascriptV2Solver {
36    delay_min: Duration,
37    delay_max: Duration,
38    captcha_provider: Option<Arc<dyn CaptchaProvider>>, // optional hCaptcha provider
39}
40
41impl JavascriptV2Solver {
42    /// Create a solver with default delay range (1-5 seconds) and no captcha provider.
43    pub fn new() -> Self {
44        Self {
45            delay_min: Duration::from_secs_f32(DEFAULT_DELAY_MIN_SECS),
46            delay_max: Duration::from_secs_f32(DEFAULT_DELAY_MAX_SECS),
47            captcha_provider: None,
48        }
49    }
50
51    /// Configure the random delay range applied before challenge submission.
52    pub fn with_delay_range(mut self, min: Duration, max: Duration) -> Self {
53        self.delay_min = min;
54        self.delay_max = if max < min { min } else { max };
55        self
56    }
57
58    /// Attach an hCaptcha provider that will be used when captcha challenges are detected.
59    pub fn with_captcha_provider(mut self, provider: Arc<dyn CaptchaProvider>) -> Self {
60        self.captcha_provider = Some(provider);
61        self
62    }
63
64    /// Set (or replace) the captcha provider after construction.
65    pub fn set_captcha_provider(&mut self, provider: Arc<dyn CaptchaProvider>) {
66        self.captcha_provider = Some(provider);
67    }
68
69    /// Remove any configured captcha provider.
70    pub fn clear_captcha_provider(&mut self) {
71        self.captcha_provider = None;
72    }
73
74    /// Returns `true` when the response matches the Cloudflare v2 JavaScript challenge signature.
75    pub fn is_js_challenge(response: &ChallengeResponse<'_>) -> bool {
76        is_cloudflare_response(response)
77            && matches!(response.status, 403 | 429 | 503)
78            && JS_CHALLENGE_RE.is_match(response.body)
79    }
80
81    /// Returns `true` when the response corresponds to the Cloudflare v2 hCaptcha flow.
82    pub fn is_captcha_challenge(response: &ChallengeResponse<'_>) -> bool {
83        is_cloudflare_response(response)
84            && response.status == 403
85            && CAPTCHA_CHALLENGE_RE.is_match(response.body)
86    }
87
88    /// Build the challenge submission payload for non-captcha VM challenges.
89    pub fn solve(
90        &self,
91        response: &ChallengeResponse<'_>,
92    ) -> Result<ChallengeSubmission, JavascriptV2Error> {
93        if !Self::is_js_challenge(response) {
94            return Err(JavascriptV2Error::NotV2Challenge);
95        }
96
97        let info = Self::extract_challenge_info(response.body)?;
98        let payload = Self::generate_payload(response.body, &info.options)?;
99        self.build_submission(response, &info.form_action, payload)
100    }
101
102    /// Build the challenge submission payload for captcha-protected VM challenges.
103    pub async fn solve_with_captcha(
104        &self,
105        response: &ChallengeResponse<'_>,
106    ) -> Result<ChallengeSubmission, JavascriptV2Error> {
107        if !Self::is_captcha_challenge(response) {
108            return Err(JavascriptV2Error::NotCaptchaChallenge);
109        }
110
111        let provider = self
112            .captcha_provider
113            .as_ref()
114            .ok_or(JavascriptV2Error::CaptchaProviderMissing)?;
115
116        let info = Self::extract_challenge_info(response.body)?;
117        let mut payload = Self::generate_payload(response.body, &info.options)?;
118
119        let site_key = Self::extract_site_key(response.body)
120            .ok_or(JavascriptV2Error::MissingToken("data-sitekey"))?;
121
122        let mut task = CaptchaTask::new(site_key, response.url.clone());
123        // Preserve challenge-specific context for providers that can use it.
124        if let Some(cv_id) = info.options.cv_id.as_ref() {
125            task = task.insert_metadata("cv_id", cv_id.clone());
126        }
127
128        let solution = provider
129            .solve(&task)
130            .await
131            .map_err(JavascriptV2Error::Captcha)?;
132        payload.insert("h-captcha-response".into(), solution.token);
133        for (key, value) in solution.metadata {
134            payload.insert(key, value);
135        }
136
137        self.build_submission(response, &info.form_action, payload)
138    }
139
140    /// Execute the full challenge flow, including waiting and submission.
141    pub async fn solve_and_submit(
142        &self,
143        client: Arc<dyn ChallengeHttpClient>,
144        response: &ChallengeResponse<'_>,
145        original_request: OriginalRequest,
146    ) -> Result<ChallengeHttpResponse, JavascriptV2Error> {
147        let submission = if Self::is_captcha_challenge(response) {
148            self.solve_with_captcha(response).await?
149        } else {
150            self.solve(response)?
151        };
152
153        let result = execute_challenge_submission(client, submission, original_request)
154            .await
155            .map_err(JavascriptV2Error::Submission)?;
156
157        // Check if Cloudflare rejected the challenge solution with 403
158        if result.status == 403 {
159            return Err(JavascriptV2Error::ChallengeSolveFailed);
160        }
161
162        Ok(result)
163    }
164
165    fn build_submission(
166        &self,
167        response: &ChallengeResponse<'_>,
168        form_action: &str,
169        mut payload: HashMap<String, String>,
170    ) -> Result<ChallengeSubmission, JavascriptV2Error> {
171        let action = decode_html_entities(form_action).into_owned();
172        let target_url = response
173            .url
174            .join(&action)
175            .map_err(|err| JavascriptV2Error::InvalidFormAction(action.clone(), err))?;
176
177        // Ensure required fields exist even if the upstream payload omitted them.
178        payload
179            .entry("cf_ch_verify".into())
180            .or_insert_with(|| "plat".into());
181        payload.entry("vc".into()).or_default();
182        payload.entry("captcha_vc".into()).or_default();
183        payload
184            .entry("cf_captcha_kind".into())
185            .or_insert_with(|| "h".into());
186        payload.entry("h-captcha-response".into()).or_default();
187
188        let mut headers = HashMap::new();
189        headers.insert(
190            "Content-Type".into(),
191            "application/x-www-form-urlencoded".into(),
192        );
193        headers.insert("Referer".into(), response.url.as_str().to_string());
194        headers.insert("Origin".into(), origin_from_url(response.url));
195
196        let wait = self.random_delay();
197        let submission =
198            ChallengeSubmission::new(http::Method::POST, target_url, payload, headers, wait);
199        Ok(submission)
200    }
201
202    fn random_delay(&self) -> Duration {
203        if self.delay_max <= self.delay_min {
204            return self.delay_min;
205        }
206        let mut rng = rand::thread_rng();
207        let min = self.delay_min.as_secs_f32();
208        let max = self.delay_max.as_secs_f32();
209        let secs = rng.gen_range(min..=max);
210        Duration::from_secs_f32(secs)
211    }
212
213    fn extract_challenge_info(body: &str) -> Result<ChallengeInfo, JavascriptV2Error> {
214        let options = Self::extract_challenge_options(body)?;
215        let form_action = Self::extract_form_action(body)?;
216        Ok(ChallengeInfo {
217            options,
218            form_action,
219        })
220    }
221
222    fn extract_challenge_options(body: &str) -> Result<ChallengeOptions, JavascriptV2Error> {
223        let captures = CHL_OPT_RE
224            .captures(body)
225            .and_then(|caps| caps.get(1))
226            .ok_or(JavascriptV2Error::ChallengeDataMissing)?;
227        let json = captures.as_str();
228        let options: ChallengeOptions = serde_json::from_str(json)?;
229        Ok(options)
230    }
231
232    fn extract_form_action(body: &str) -> Result<String, JavascriptV2Error> {
233        let action = FORM_ACTION_RE
234            .captures(body)
235            .and_then(|caps| caps.get(1))
236            .map(|m| m.as_str().to_string())
237            .ok_or(JavascriptV2Error::FormActionMissing)?;
238        Ok(action)
239    }
240
241    fn generate_payload(
242        body: &str,
243        options: &ChallengeOptions,
244    ) -> Result<HashMap<String, String>, JavascriptV2Error> {
245        let r_token = R_TOKEN_RE
246            .captures(body)
247            .and_then(|caps| caps.get(1))
248            .map(|m| m.as_str().to_string())
249            .ok_or(JavascriptV2Error::MissingToken("r"))?;
250
251        let mut payload = HashMap::new();
252        payload.insert("r".into(), r_token);
253        if let Some(cv_id) = options.cv_id.as_ref() {
254            payload.insert("cv_chal_id".into(), cv_id.clone());
255        }
256        if let Some(page_data) = options.chl_page_data.as_ref() {
257            payload.insert("cf_chl_page_data".into(), page_data.clone());
258        }
259        Ok(payload)
260    }
261
262    fn extract_site_key(body: &str) -> Option<String> {
263        SITE_KEY_RE
264            .captures(body)
265            .and_then(|caps| caps.get(1))
266            .map(|m| m.as_str().to_string())
267    }
268}
269
270impl Default for JavascriptV2Solver {
271    fn default() -> Self {
272        Self::new()
273    }
274}
275
276impl super::ChallengeSolver for JavascriptV2Solver {
277    fn name(&self) -> &'static str {
278        "javascript_v2"
279    }
280}
281
282#[derive(Debug, Deserialize)]
283struct ChallengeOptions {
284    #[serde(rename = "cvId")]
285    cv_id: Option<String>,
286    #[serde(rename = "chlPageData")]
287    chl_page_data: Option<String>,
288    #[serde(flatten)]
289    _extra: serde_json::Value,
290}
291
292struct ChallengeInfo {
293    options: ChallengeOptions,
294    form_action: String,
295}
296
297#[derive(Debug, Error)]
298pub enum JavascriptV2Error {
299    #[error("response is not a Cloudflare v2 challenge")]
300    NotV2Challenge,
301    #[error("response is not a Cloudflare v2 captcha challenge")]
302    NotCaptchaChallenge,
303    #[error("required challenge data missing")]
304    ChallengeDataMissing,
305    #[error("challenge form action missing")]
306    FormActionMissing,
307    #[error("missing token '{0}' in challenge page")]
308    MissingToken(&'static str),
309    #[error("challenge data could not be parsed: {0}")]
310    ChallengeDataParse(#[from] serde_json::Error),
311    #[error("invalid form action '{0}': {1}")]
312    InvalidFormAction(String, url::ParseError),
313    #[error("captcha provider not configured")]
314    CaptchaProviderMissing,
315    #[error("captcha solving failed: {0}")]
316    Captcha(#[source] CaptchaError),
317    #[error("failed to solve Cloudflare v2 challenge - received 403 status")]
318    ChallengeSolveFailed,
319    #[error("challenge submission failed: {0}")]
320    Submission(#[source] ChallengeExecutionError),
321}
322
323// Regular expressions reused across the solver.
324static JS_CHALLENGE_RE: Lazy<Regex> = Lazy::new(|| {
325    RegexBuilder::new(r#"cpo\.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v1"#)
326        .case_insensitive(true)
327        .dot_matches_new_line(true)
328        .build()
329        .expect("invalid JS challenge regex")
330});
331
332static CAPTCHA_CHALLENGE_RE: Lazy<Regex> = Lazy::new(|| {
333    RegexBuilder::new(
334        r#"cpo\.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/(captcha|managed)/v1"#,
335    )
336    .case_insensitive(true)
337    .dot_matches_new_line(true)
338    .build()
339    .expect("invalid captcha challenge regex")
340});
341
342static CHL_OPT_RE: Lazy<Regex> = Lazy::new(|| {
343    RegexBuilder::new(r#"window\._cf_chl_opt=\(?(\{.*?\})\)?;"#)
344        .dot_matches_new_line(true)
345        .build()
346        .expect("invalid _cf_chl_opt regex")
347});
348
349static FORM_ACTION_RE: Lazy<Regex> = Lazy::new(|| {
350    RegexBuilder::new(r#"<form[^>]+id=['"]challenge-form['"][^>]*action=['"]([^'"]+)['"]"#)
351        .case_insensitive(true)
352        .dot_matches_new_line(true)
353        .build()
354        .expect("invalid form action regex")
355});
356
357static R_TOKEN_RE: Lazy<Regex> = Lazy::new(|| {
358    RegexBuilder::new(r#"name=['"]r['"]\s+value=['"]([^'"]+)['"]"#)
359        .case_insensitive(true)
360        .dot_matches_new_line(true)
361        .build()
362        .expect("invalid r token regex")
363});
364
365static SITE_KEY_RE: Lazy<Regex> = Lazy::new(|| {
366    RegexBuilder::new(r#"data-sitekey=['"]([^'"]+)['"]"#)
367        .case_insensitive(true)
368        .dot_matches_new_line(true)
369        .build()
370        .expect("invalid site key regex")
371});
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376    use async_trait::async_trait;
377    use http::{HeaderMap, Method, header::SERVER};
378    use url::Url;
379
380    use crate::external_deps::captcha::{CaptchaResult, CaptchaSolution};
381
382    struct ResponseFixture {
383        url: Url,
384        headers: HeaderMap,
385        method: Method,
386        body: String,
387        status: u16,
388    }
389
390    impl ResponseFixture {
391        fn new(body: &str, status: u16) -> Self {
392            let mut headers = HeaderMap::new();
393            headers.insert(SERVER, "cloudflare".parse().unwrap());
394            Self {
395                url: Url::parse("https://example.com/").unwrap(),
396                headers,
397                method: Method::GET,
398                body: body.to_string(),
399                status,
400            }
401        }
402
403        fn response(&self) -> ChallengeResponse<'_> {
404            ChallengeResponse {
405                url: &self.url,
406                status: self.status,
407                headers: &self.headers,
408                body: &self.body,
409                request_method: &self.method,
410            }
411        }
412    }
413
414    struct StubCaptchaProvider;
415
416    #[async_trait]
417    impl CaptchaProvider for StubCaptchaProvider {
418        fn name(&self) -> &'static str {
419            "stub"
420        }
421
422        async fn solve(&self, _task: &CaptchaTask) -> CaptchaResult {
423            Ok(CaptchaSolution::new("captcha-token"))
424        }
425    }
426
427    fn sample_html(include_captcha: bool) -> String {
428        let orchestrate_path = if include_captcha {
429            "/cdn-cgi/challenge-platform/h/b/orchestrate/captcha/v1"
430        } else {
431            "/cdn-cgi/challenge-platform/h/b/orchestrate/jsch/v1"
432        };
433        let captcha_snippet = if include_captcha {
434            "<div class='cf-turnstile' data-sitekey='site-key-123'></div>"
435        } else {
436            ""
437        };
438
439        format!(
440            r#"
441            <html>
442              <head>
443                                <script>window._cf_chl_opt=({{"cvId":"cv123","chlPageData":"page-data"}});</script>
444              </head>
445              <body>
446                                <script>var cpo={{}};cpo.src="{orchestrate_path}";</script>
447                <form id="challenge-form" action="/cdn-cgi/challenge-platform/h/b/orchestrate/form" method="POST">
448                  <input type="hidden" name="r" value="token-r"/>
449                </form>
450                {captcha_snippet}
451              </body>
452            </html>
453        "#
454        )
455    }
456
457    #[test]
458    fn solve_builds_submission() {
459        let html = sample_html(false);
460        let fixture = ResponseFixture::new(&html, 403);
461        let solver = JavascriptV2Solver::new();
462        assert!(JavascriptV2Solver::is_js_challenge(&fixture.response()));
463
464        let submission = solver.solve(&fixture.response()).expect("should solve");
465        assert_eq!(submission.method, Method::POST);
466        assert_eq!(
467            submission.url.as_str(),
468            "https://example.com/cdn-cgi/challenge-platform/h/b/orchestrate/form"
469        );
470        assert_eq!(
471            submission.form_fields.get("r"),
472            Some(&"token-r".to_string())
473        );
474        assert_eq!(
475            submission.form_fields.get("cv_chal_id"),
476            Some(&"cv123".to_string())
477        );
478        assert!(submission.wait >= Duration::from_secs(1));
479        assert!(submission.wait <= Duration::from_secs(5));
480        assert_eq!(
481            submission.headers.get("Content-Type"),
482            Some(&"application/x-www-form-urlencoded".to_string())
483        );
484        assert_eq!(
485            submission.headers.get("Referer"),
486            Some(&"https://example.com/".to_string())
487        );
488    }
489
490    #[tokio::test]
491    async fn solve_with_captcha_uses_provider() {
492        let html = sample_html(true);
493        let fixture = ResponseFixture::new(&html, 403);
494        let solver = JavascriptV2Solver::new().with_captcha_provider(Arc::new(StubCaptchaProvider));
495        let submission = solver
496            .solve_with_captcha(&fixture.response())
497            .await
498            .expect("captcha challenge solved");
499        assert_eq!(
500            submission.form_fields.get("h-captcha-response"),
501            Some(&"captcha-token".to_string())
502        );
503    }
504
505    #[tokio::test]
506    async fn solve_with_captcha_requires_provider() {
507        let html = sample_html(true);
508        let fixture = ResponseFixture::new(&html, 403);
509        let solver = JavascriptV2Solver::new();
510        let err = solver
511            .solve_with_captcha(&fixture.response())
512            .await
513            .expect_err("missing provider should fail");
514        matches!(err, JavascriptV2Error::CaptchaProviderMissing);
515    }
516
517    #[test]
518    fn challenge_opt_regex_handles_optional_parens() {
519        // Test with parentheses (old format)
520        let html_with_parens = r#"
521            <script>window._cf_chl_opt=({"cvId":"test123","chlPageData":"data"});</script>
522        "#;
523        assert!(CHL_OPT_RE.is_match(html_with_parens));
524
525        // Test without parentheses (newer format)
526        let html_without_parens = r#"
527            <script>window._cf_chl_opt={"cvId":"test123","chlPageData":"data"};</script>
528        "#;
529        assert!(CHL_OPT_RE.is_match(html_without_parens));
530    }
531}