cloudscraper_rs/challenges/solvers/
javascript_v2.rs

1//! Solver for Cloudflare JavaScript VM challenge v2.
2//!
3//! Extracts orchestration metadata embedded in the challenge page, prepares the
4//! expected payload (including optional hCaptcha tokens), and relies on the
5//! shared executor to perform the delayed submission.
6
7use std::collections::HashMap;
8use std::sync::Arc;
9use std::time::Duration;
10
11use html_escape::decode_html_entities;
12use once_cell::sync::Lazy;
13use rand::Rng;
14use regex::{Regex, RegexBuilder};
15use serde::Deserialize;
16use thiserror::Error;
17
18use crate::challenges::core::{
19    ChallengeExecutionError, ChallengeHttpClient, ChallengeHttpResponse, ChallengeResponse,
20    ChallengeSubmission, OriginalRequest, execute_challenge_submission, is_cloudflare_response,
21    origin_from_url,
22};
23use crate::external_deps::captcha::{CaptchaError, CaptchaProvider, CaptchaTask};
24
25/// Default minimum random wait (seconds) before submitting the response.
26const DEFAULT_DELAY_MIN_SECS: f32 = 1.0;
27/// Default maximum random wait (seconds) before submitting the response.
28const DEFAULT_DELAY_MAX_SECS: f32 = 5.0;
29
30/// Solver capable of handling Cloudflare VM (v2) JavaScript challenges.
31pub struct JavascriptV2Solver {
32    delay_min: Duration,
33    delay_max: Duration,
34    captcha_provider: Option<Arc<dyn CaptchaProvider>>, // optional hCaptcha provider
35}
36
37impl JavascriptV2Solver {
38    /// Create a solver with default delay range (1-5 seconds) and no captcha provider.
39    pub fn new() -> Self {
40        Self {
41            delay_min: Duration::from_secs_f32(DEFAULT_DELAY_MIN_SECS),
42            delay_max: Duration::from_secs_f32(DEFAULT_DELAY_MAX_SECS),
43            captcha_provider: None,
44        }
45    }
46
47    /// Configure the random delay range applied before challenge submission.
48    pub fn with_delay_range(mut self, min: Duration, max: Duration) -> Self {
49        self.delay_min = min;
50        self.delay_max = if max < min { min } else { max };
51        self
52    }
53
54    /// Attach an hCaptcha provider that will be used when captcha challenges are detected.
55    pub fn with_captcha_provider(mut self, provider: Arc<dyn CaptchaProvider>) -> Self {
56        self.captcha_provider = Some(provider);
57        self
58    }
59
60    /// Set (or replace) the captcha provider after construction.
61    pub fn set_captcha_provider(&mut self, provider: Arc<dyn CaptchaProvider>) {
62        self.captcha_provider = Some(provider);
63    }
64
65    /// Remove any configured captcha provider.
66    pub fn clear_captcha_provider(&mut self) {
67        self.captcha_provider = None;
68    }
69
70    /// Returns `true` when the response matches the Cloudflare v2 JavaScript challenge signature.
71    pub fn is_js_challenge(response: &ChallengeResponse<'_>) -> bool {
72        is_cloudflare_response(response)
73            && matches!(response.status, 403 | 429 | 503)
74            && JS_CHALLENGE_RE.is_match(response.body)
75    }
76
77    /// Returns `true` when the response corresponds to the Cloudflare v2 hCaptcha flow.
78    pub fn is_captcha_challenge(response: &ChallengeResponse<'_>) -> bool {
79        is_cloudflare_response(response)
80            && response.status == 403
81            && CAPTCHA_CHALLENGE_RE.is_match(response.body)
82    }
83
84    /// Build the challenge submission payload for non-captcha VM challenges.
85    pub fn solve(
86        &self,
87        response: &ChallengeResponse<'_>,
88    ) -> Result<ChallengeSubmission, JavascriptV2Error> {
89        if !Self::is_js_challenge(response) {
90            return Err(JavascriptV2Error::NotV2Challenge);
91        }
92
93        let info = Self::extract_challenge_info(response.body)?;
94        let payload = Self::generate_payload(response.body, &info.options)?;
95        self.build_submission(response, &info.form_action, payload)
96    }
97
98    /// Build the challenge submission payload for captcha-protected VM challenges.
99    pub async fn solve_with_captcha(
100        &self,
101        response: &ChallengeResponse<'_>,
102    ) -> Result<ChallengeSubmission, JavascriptV2Error> {
103        if !Self::is_captcha_challenge(response) {
104            return Err(JavascriptV2Error::NotCaptchaChallenge);
105        }
106
107        let provider = self
108            .captcha_provider
109            .as_ref()
110            .ok_or(JavascriptV2Error::CaptchaProviderMissing)?;
111
112        let info = Self::extract_challenge_info(response.body)?;
113        let mut payload = Self::generate_payload(response.body, &info.options)?;
114
115        let site_key = Self::extract_site_key(response.body)
116            .ok_or(JavascriptV2Error::MissingToken("data-sitekey"))?;
117
118        let mut task = CaptchaTask::new(site_key, response.url.clone());
119        // Preserve challenge-specific context for providers that can use it.
120        if let Some(cv_id) = info.options.cv_id.as_ref() {
121            task = task.insert_metadata("cv_id", cv_id.clone());
122        }
123
124        let solution = provider
125            .solve(&task)
126            .await
127            .map_err(JavascriptV2Error::Captcha)?;
128        payload.insert("h-captcha-response".into(), solution.token);
129        for (key, value) in solution.metadata {
130            payload.insert(key, value);
131        }
132
133        self.build_submission(response, &info.form_action, payload)
134    }
135
136    /// Execute the full challenge flow, including waiting and submission.
137    pub async fn solve_and_submit(
138        &self,
139        client: Arc<dyn ChallengeHttpClient>,
140        response: &ChallengeResponse<'_>,
141        original_request: OriginalRequest,
142    ) -> Result<ChallengeHttpResponse, JavascriptV2Error> {
143        let submission = if Self::is_captcha_challenge(response) {
144            self.solve_with_captcha(response).await?
145        } else {
146            self.solve(response)?
147        };
148
149        execute_challenge_submission(client, submission, original_request)
150            .await
151            .map_err(JavascriptV2Error::Submission)
152    }
153
154    fn build_submission(
155        &self,
156        response: &ChallengeResponse<'_>,
157        form_action: &str,
158        mut payload: HashMap<String, String>,
159    ) -> Result<ChallengeSubmission, JavascriptV2Error> {
160        let action = decode_html_entities(form_action).into_owned();
161        let target_url = response
162            .url
163            .join(&action)
164            .map_err(|err| JavascriptV2Error::InvalidFormAction(action.clone(), err))?;
165
166        // Ensure required fields exist even if the upstream payload omitted them.
167        payload
168            .entry("cf_ch_verify".into())
169            .or_insert_with(|| "plat".into());
170        payload.entry("vc".into()).or_default();
171        payload.entry("captcha_vc".into()).or_default();
172        payload
173            .entry("cf_captcha_kind".into())
174            .or_insert_with(|| "h".into());
175        payload.entry("h-captcha-response".into()).or_default();
176
177        let mut headers = HashMap::new();
178        headers.insert(
179            "Content-Type".into(),
180            "application/x-www-form-urlencoded".into(),
181        );
182        headers.insert("Referer".into(), response.url.as_str().to_string());
183        headers.insert("Origin".into(), origin_from_url(response.url));
184
185        let wait = self.random_delay();
186        let submission =
187            ChallengeSubmission::new(http::Method::POST, target_url, payload, headers, wait);
188        Ok(submission)
189    }
190
191    fn random_delay(&self) -> Duration {
192        if self.delay_max <= self.delay_min {
193            return self.delay_min;
194        }
195        let mut rng = rand::thread_rng();
196        let min = self.delay_min.as_secs_f32();
197        let max = self.delay_max.as_secs_f32();
198        let secs = rng.gen_range(min..=max);
199        Duration::from_secs_f32(secs)
200    }
201
202    fn extract_challenge_info(body: &str) -> Result<ChallengeInfo, JavascriptV2Error> {
203        let options = Self::extract_challenge_options(body)?;
204        let form_action = Self::extract_form_action(body)?;
205        Ok(ChallengeInfo {
206            options,
207            form_action,
208        })
209    }
210
211    fn extract_challenge_options(body: &str) -> Result<ChallengeOptions, JavascriptV2Error> {
212        let captures = CHL_OPT_RE
213            .captures(body)
214            .and_then(|caps| caps.get(1))
215            .ok_or(JavascriptV2Error::ChallengeDataMissing)?;
216        let json = captures.as_str();
217        let options: ChallengeOptions = serde_json::from_str(json)?;
218        Ok(options)
219    }
220
221    fn extract_form_action(body: &str) -> Result<String, JavascriptV2Error> {
222        let action = FORM_ACTION_RE
223            .captures(body)
224            .and_then(|caps| caps.get(1))
225            .map(|m| m.as_str().to_string())
226            .ok_or(JavascriptV2Error::FormActionMissing)?;
227        Ok(action)
228    }
229
230    fn generate_payload(
231        body: &str,
232        options: &ChallengeOptions,
233    ) -> Result<HashMap<String, String>, JavascriptV2Error> {
234        let r_token = R_TOKEN_RE
235            .captures(body)
236            .and_then(|caps| caps.get(1))
237            .map(|m| m.as_str().to_string())
238            .ok_or(JavascriptV2Error::MissingToken("r"))?;
239
240        let mut payload = HashMap::new();
241        payload.insert("r".into(), r_token);
242        if let Some(cv_id) = options.cv_id.as_ref() {
243            payload.insert("cv_chal_id".into(), cv_id.clone());
244        }
245        if let Some(page_data) = options.chl_page_data.as_ref() {
246            payload.insert("cf_chl_page_data".into(), page_data.clone());
247        }
248        Ok(payload)
249    }
250
251    fn extract_site_key(body: &str) -> Option<String> {
252        SITE_KEY_RE
253            .captures(body)
254            .and_then(|caps| caps.get(1))
255            .map(|m| m.as_str().to_string())
256    }
257}
258
259impl Default for JavascriptV2Solver {
260    fn default() -> Self {
261        Self::new()
262    }
263}
264
265impl super::ChallengeSolver for JavascriptV2Solver {
266    fn name(&self) -> &'static str {
267        "javascript_v2"
268    }
269}
270
271#[derive(Debug, Deserialize)]
272struct ChallengeOptions {
273    #[serde(rename = "cvId")]
274    cv_id: Option<String>,
275    #[serde(rename = "chlPageData")]
276    chl_page_data: Option<String>,
277    #[serde(flatten)]
278    _extra: serde_json::Value,
279}
280
281struct ChallengeInfo {
282    options: ChallengeOptions,
283    form_action: String,
284}
285
286#[derive(Debug, Error)]
287pub enum JavascriptV2Error {
288    #[error("response is not a Cloudflare v2 challenge")]
289    NotV2Challenge,
290    #[error("response is not a Cloudflare v2 captcha challenge")]
291    NotCaptchaChallenge,
292    #[error("required challenge data missing")]
293    ChallengeDataMissing,
294    #[error("challenge form action missing")]
295    FormActionMissing,
296    #[error("missing token '{0}' in challenge page")]
297    MissingToken(&'static str),
298    #[error("challenge data could not be parsed: {0}")]
299    ChallengeDataParse(#[from] serde_json::Error),
300    #[error("invalid form action '{0}': {1}")]
301    InvalidFormAction(String, url::ParseError),
302    #[error("captcha provider not configured")]
303    CaptchaProviderMissing,
304    #[error("captcha solving failed: {0}")]
305    Captcha(#[source] CaptchaError),
306    #[error("challenge submission failed: {0}")]
307    Submission(#[source] ChallengeExecutionError),
308}
309
310// Regular expressions reused across the solver.
311static JS_CHALLENGE_RE: Lazy<Regex> = Lazy::new(|| {
312    RegexBuilder::new(r#"cpo\.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v1"#)
313        .case_insensitive(true)
314        .dot_matches_new_line(true)
315        .build()
316        .expect("invalid JS challenge regex")
317});
318
319static CAPTCHA_CHALLENGE_RE: Lazy<Regex> = Lazy::new(|| {
320    RegexBuilder::new(
321        r#"cpo\.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/(captcha|managed)/v1"#,
322    )
323    .case_insensitive(true)
324    .dot_matches_new_line(true)
325    .build()
326    .expect("invalid captcha challenge regex")
327});
328
329static CHL_OPT_RE: Lazy<Regex> = Lazy::new(|| {
330    RegexBuilder::new(r#"window\._cf_chl_opt=\((\{[^;]+\})\);"#)
331        .dot_matches_new_line(true)
332        .build()
333        .expect("invalid _cf_chl_opt regex")
334});
335
336static FORM_ACTION_RE: Lazy<Regex> = Lazy::new(|| {
337    RegexBuilder::new(r#"<form[^>]+id=['"]challenge-form['"][^>]*action=['"]([^'"]+)['"]"#)
338        .case_insensitive(true)
339        .dot_matches_new_line(true)
340        .build()
341        .expect("invalid form action regex")
342});
343
344static R_TOKEN_RE: Lazy<Regex> = Lazy::new(|| {
345    RegexBuilder::new(r#"name=['"]r['"]\s+value=['"]([^'"]+)['"]"#)
346        .case_insensitive(true)
347        .dot_matches_new_line(true)
348        .build()
349        .expect("invalid r token regex")
350});
351
352static SITE_KEY_RE: Lazy<Regex> = Lazy::new(|| {
353    RegexBuilder::new(r#"data-sitekey=['"]([^'"]+)['"]"#)
354        .case_insensitive(true)
355        .dot_matches_new_line(true)
356        .build()
357        .expect("invalid site key regex")
358});
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363    use async_trait::async_trait;
364    use http::{HeaderMap, Method, header::SERVER};
365    use url::Url;
366
367    use crate::external_deps::captcha::{CaptchaResult, CaptchaSolution};
368
369    struct ResponseFixture {
370        url: Url,
371        headers: HeaderMap,
372        method: Method,
373        body: String,
374        status: u16,
375    }
376
377    impl ResponseFixture {
378        fn new(body: &str, status: u16) -> Self {
379            let mut headers = HeaderMap::new();
380            headers.insert(SERVER, "cloudflare".parse().unwrap());
381            Self {
382                url: Url::parse("https://example.com/").unwrap(),
383                headers,
384                method: Method::GET,
385                body: body.to_string(),
386                status,
387            }
388        }
389
390        fn response(&self) -> ChallengeResponse<'_> {
391            ChallengeResponse {
392                url: &self.url,
393                status: self.status,
394                headers: &self.headers,
395                body: &self.body,
396                request_method: &self.method,
397            }
398        }
399    }
400
401    struct StubCaptchaProvider;
402
403    #[async_trait]
404    impl CaptchaProvider for StubCaptchaProvider {
405        fn name(&self) -> &'static str {
406            "stub"
407        }
408
409        async fn solve(&self, _task: &CaptchaTask) -> CaptchaResult {
410            Ok(CaptchaSolution::new("captcha-token"))
411        }
412    }
413
414    fn sample_html(include_captcha: bool) -> String {
415        let orchestrate_path = if include_captcha {
416            "/cdn-cgi/challenge-platform/h/b/orchestrate/captcha/v1"
417        } else {
418            "/cdn-cgi/challenge-platform/h/b/orchestrate/jsch/v1"
419        };
420        let captcha_snippet = if include_captcha {
421            "<div class='cf-turnstile' data-sitekey='site-key-123'></div>"
422        } else {
423            ""
424        };
425
426        format!(
427            r#"
428            <html>
429              <head>
430                                <script>window._cf_chl_opt=({{"cvId":"cv123","chlPageData":"page-data"}});</script>
431              </head>
432              <body>
433                                <script>var cpo={{}};cpo.src="{orchestrate_path}";</script>
434                <form id="challenge-form" action="/cdn-cgi/challenge-platform/h/b/orchestrate/form" method="POST">
435                  <input type="hidden" name="r" value="token-r"/>
436                </form>
437                {captcha_snippet}
438              </body>
439            </html>
440        "#
441        )
442    }
443
444    #[test]
445    fn solve_builds_submission() {
446        let html = sample_html(false);
447        let fixture = ResponseFixture::new(&html, 403);
448        let solver = JavascriptV2Solver::new();
449        assert!(JavascriptV2Solver::is_js_challenge(&fixture.response()));
450
451        let submission = solver.solve(&fixture.response()).expect("should solve");
452        assert_eq!(submission.method, Method::POST);
453        assert_eq!(
454            submission.url.as_str(),
455            "https://example.com/cdn-cgi/challenge-platform/h/b/orchestrate/form"
456        );
457        assert_eq!(
458            submission.form_fields.get("r"),
459            Some(&"token-r".to_string())
460        );
461        assert_eq!(
462            submission.form_fields.get("cv_chal_id"),
463            Some(&"cv123".to_string())
464        );
465        assert!(submission.wait >= Duration::from_secs(1));
466        assert!(submission.wait <= Duration::from_secs(5));
467        assert_eq!(
468            submission.headers.get("Content-Type"),
469            Some(&"application/x-www-form-urlencoded".to_string())
470        );
471        assert_eq!(
472            submission.headers.get("Referer"),
473            Some(&"https://example.com/".to_string())
474        );
475    }
476
477    #[tokio::test]
478    async fn solve_with_captcha_uses_provider() {
479        let html = sample_html(true);
480        let fixture = ResponseFixture::new(&html, 403);
481        let solver = JavascriptV2Solver::new().with_captcha_provider(Arc::new(StubCaptchaProvider));
482        let submission = solver
483            .solve_with_captcha(&fixture.response())
484            .await
485            .expect("captcha challenge solved");
486        assert_eq!(
487            submission.form_fields.get("h-captcha-response"),
488            Some(&"captcha-token".to_string())
489        );
490    }
491
492    #[tokio::test]
493    async fn solve_with_captcha_requires_provider() {
494        let html = sample_html(true);
495        let fixture = ResponseFixture::new(&html, 403);
496        let solver = JavascriptV2Solver::new();
497        let err = solver
498            .solve_with_captcha(&fixture.response())
499            .await
500            .expect_err("missing provider should fail");
501        matches!(err, JavascriptV2Error::CaptchaProviderMissing);
502    }
503}