cloudscraper_rs/challenges/solvers/
turnstile.rs

1//! Solver for Cloudflare Turnstile captcha challenges.
2//!
3//! Detects the Turnstile widget, delegates solving to a configurable captcha
4//! provider, and prepares the submission payload consumed by the shared
5//! executor.
6
7use std::collections::HashMap;
8use std::sync::Arc;
9use std::time::Duration;
10
11use html_escape::decode_html_entities;
12use once_cell::sync::Lazy;
13use rand::Rng;
14use regex::{Regex, RegexBuilder};
15use thiserror::Error;
16
17use crate::challenges::core::{
18    ChallengeExecutionError, ChallengeHttpClient, ChallengeHttpResponse, ChallengeResponse,
19    ChallengeSubmission, OriginalRequest, execute_challenge_submission, is_cloudflare_response,
20    origin_from_url,
21};
22use crate::external_deps::captcha::{CaptchaError, CaptchaProvider, CaptchaTask};
23
24use super::ChallengeSolver;
25
26const DEFAULT_DELAY_MIN_SECS: f32 = 1.0;
27const DEFAULT_DELAY_MAX_SECS: f32 = 5.0;
28
29/// Solver capable of handling Cloudflare Turnstile challenges.
30pub struct TurnstileSolver {
31    delay_min: Duration,
32    delay_max: Duration,
33    captcha_provider: Option<Arc<dyn CaptchaProvider>>,
34}
35
36impl TurnstileSolver {
37    /// Create a solver with the default random delay and no captcha provider.
38    pub fn new() -> Self {
39        Self {
40            delay_min: Duration::from_secs_f32(DEFAULT_DELAY_MIN_SECS),
41            delay_max: Duration::from_secs_f32(DEFAULT_DELAY_MAX_SECS),
42            captcha_provider: None,
43        }
44    }
45
46    /// Configure a custom delay range used before posting the solution.
47    pub fn with_delay_range(mut self, min: Duration, max: Duration) -> Self {
48        self.delay_min = min;
49        self.delay_max = if max < min { min } else { max };
50        self
51    }
52
53    /// Attach a captcha provider used to solve Turnstile tokens.
54    pub fn with_captcha_provider(mut self, provider: Arc<dyn CaptchaProvider>) -> Self {
55        self.captcha_provider = Some(provider);
56        self
57    }
58
59    /// Replace or set the captcha provider after construction.
60    pub fn set_captcha_provider(&mut self, provider: Arc<dyn CaptchaProvider>) {
61        self.captcha_provider = Some(provider);
62    }
63
64    /// Remove the configured captcha provider.
65    pub fn clear_captcha_provider(&mut self) {
66        self.captcha_provider = None;
67    }
68
69    /// Returns `true` when the response resembles a Turnstile challenge page.
70    pub fn is_turnstile_challenge(response: &ChallengeResponse<'_>) -> bool {
71        is_cloudflare_response(response)
72            && matches!(response.status, 403 | 429 | 503)
73            && (TURNSTILE_WIDGET_RE.is_match(response.body)
74                || TURNSTILE_SCRIPT_RE.is_match(response.body)
75                || TURNSTILE_SITEKEY_RE.is_match(response.body))
76    }
77
78    /// Solve the Turnstile page and return the planned challenge submission.
79    pub async fn solve(
80        &self,
81        response: &ChallengeResponse<'_>,
82    ) -> Result<ChallengeSubmission, TurnstileError> {
83        if !Self::is_turnstile_challenge(response) {
84            return Err(TurnstileError::NotTurnstileChallenge);
85        }
86
87        let provider = self
88            .captcha_provider
89            .as_ref()
90            .ok_or(TurnstileError::CaptchaProviderMissing)?;
91
92        let info = Self::extract_turnstile_info(response)?;
93        let task =
94            CaptchaTask::new(info.site_key.clone(), response.url.clone()).with_action("turnstile");
95        let solution = provider
96            .solve(&task)
97            .await
98            .map_err(TurnstileError::Captcha)?;
99
100        let payload = Self::build_payload(response.body, solution.token);
101        self.build_submission(response, &info.form_action, payload)
102    }
103
104    /// Solve and submit the challenge using the supplied HTTP client.
105    pub async fn solve_and_submit(
106        &self,
107        client: Arc<dyn ChallengeHttpClient>,
108        response: &ChallengeResponse<'_>,
109        original_request: OriginalRequest,
110    ) -> Result<ChallengeHttpResponse, TurnstileError> {
111        let submission = self.solve(response).await?;
112        execute_challenge_submission(client, submission, original_request)
113            .await
114            .map_err(TurnstileError::Submission)
115    }
116
117    fn build_submission(
118        &self,
119        response: &ChallengeResponse<'_>,
120        form_action: &str,
121        mut payload: HashMap<String, String>,
122    ) -> Result<ChallengeSubmission, TurnstileError> {
123        let form_action = decode_html_entities(form_action).into_owned();
124        let target_url = response
125            .url
126            .join(&form_action)
127            .map_err(|err| TurnstileError::InvalidFormAction(form_action.clone(), err))?;
128
129        let mut headers = HashMap::new();
130        headers.insert(
131            "Content-Type".into(),
132            "application/x-www-form-urlencoded".into(),
133        );
134        headers.insert("Referer".into(), response.url.as_str().to_string());
135        headers.insert("Origin".into(), origin_from_url(response.url));
136
137        let wait = self.random_delay();
138        payload.entry("cf-turnstile-response".into()).or_default();
139
140        Ok(ChallengeSubmission::new(
141            http::Method::POST,
142            target_url,
143            payload,
144            headers,
145            wait,
146        ))
147    }
148
149    fn random_delay(&self) -> Duration {
150        if self.delay_max <= self.delay_min {
151            return self.delay_min;
152        }
153        let mut rng = rand::thread_rng();
154        let min = self.delay_min.as_secs_f32();
155        let max = self.delay_max.as_secs_f32();
156        Duration::from_secs_f32(rng.gen_range(min..max))
157    }
158
159    fn extract_turnstile_info(
160        response: &ChallengeResponse<'_>,
161    ) -> Result<TurnstileInfo, TurnstileError> {
162        let body = response.body;
163        let site_key = TURNSTILE_SITEKEY_RE
164            .captures(body)
165            .and_then(|caps| caps.get(1))
166            .map(|m| m.as_str().to_string())
167            .ok_or(TurnstileError::MissingSiteKey)?;
168
169        let form_action = FORM_ACTION_RE
170            .captures(body)
171            .and_then(|caps| caps.get(1))
172            .map(|m| m.as_str().to_string())
173            .unwrap_or_else(|| response.url.as_str().to_string());
174
175        Ok(TurnstileInfo {
176            site_key,
177            form_action,
178        })
179    }
180
181    fn build_payload(body: &str, token: String) -> HashMap<String, String> {
182        let mut payload = HashMap::new();
183        payload.insert("cf-turnstile-response".into(), token);
184
185        for caps in INPUT_FIELD_RE.captures_iter(body) {
186            if let (Some(name), Some(value)) = (caps.get(1), caps.get(2)) {
187                let key = name.as_str();
188                if key != "cf-turnstile-response" && !payload.contains_key(key) {
189                    payload.insert(key.to_string(), value.as_str().to_string());
190                }
191            }
192        }
193
194        payload
195    }
196}
197
198impl Default for TurnstileSolver {
199    fn default() -> Self {
200        Self::new()
201    }
202}
203
204impl ChallengeSolver for TurnstileSolver {
205    fn name(&self) -> &'static str {
206        "turnstile"
207    }
208}
209
210struct TurnstileInfo {
211    site_key: String,
212    form_action: String,
213}
214
215#[derive(Debug, Error)]
216pub enum TurnstileError {
217    #[error("response is not a Cloudflare Turnstile challenge")]
218    NotTurnstileChallenge,
219    #[error("captcha provider missing for Turnstile challenge")]
220    CaptchaProviderMissing,
221    #[error("missing Turnstile site key")]
222    MissingSiteKey,
223    #[error("invalid form action '{0}': {1}")]
224    InvalidFormAction(String, url::ParseError),
225    #[error("captcha provider error: {0}")]
226    Captcha(#[source] CaptchaError),
227    #[error("challenge submission failed: {0}")]
228    Submission(#[source] ChallengeExecutionError),
229}
230
231static TURNSTILE_WIDGET_RE: Lazy<Regex> = Lazy::new(|| {
232    RegexBuilder::new(r#"class=['"][^'"]*cf-turnstile[^'"]*['"]"#)
233        .case_insensitive(true)
234        .dot_matches_new_line(true)
235        .build()
236        .expect("invalid turnstile widget regex")
237});
238
239static TURNSTILE_SCRIPT_RE: Lazy<Regex> = Lazy::new(|| {
240    RegexBuilder::new(r#"src=['"]https://challenges\.cloudflare\.com/turnstile/v0/api\.js"#)
241        .case_insensitive(true)
242        .dot_matches_new_line(true)
243        .build()
244        .expect("invalid turnstile script regex")
245});
246
247static TURNSTILE_SITEKEY_RE: Lazy<Regex> = Lazy::new(|| {
248    RegexBuilder::new(r#"data-sitekey=['"]([0-9A-Za-z]{40})['"]"#)
249        .case_insensitive(true)
250        .dot_matches_new_line(true)
251        .build()
252        .expect("invalid turnstile site key regex")
253});
254
255static FORM_ACTION_RE: Lazy<Regex> = Lazy::new(|| {
256    RegexBuilder::new(r#"<form[^>]*action=['"]([^'"]+)['"]"#)
257        .case_insensitive(true)
258        .dot_matches_new_line(true)
259        .build()
260        .expect("invalid turnstile form action regex")
261});
262
263static INPUT_FIELD_RE: Lazy<Regex> = Lazy::new(|| {
264    RegexBuilder::new(r#"<input[^>]*name=['"]([^'"]+)['"][^>]*value=['"]([^'"]*)['"]"#)
265        .case_insensitive(true)
266        .dot_matches_new_line(true)
267        .build()
268        .expect("invalid input field regex")
269});
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274    use async_trait::async_trait;
275    use http::{HeaderMap, Method, header::SERVER};
276    use url::Url;
277
278    use crate::external_deps::captcha::{CaptchaResult, CaptchaSolution};
279
280    struct ResponseFixture {
281        url: Url,
282        headers: HeaderMap,
283        method: Method,
284        body: String,
285        status: u16,
286    }
287
288    impl ResponseFixture {
289        fn new(body: &str, status: u16) -> Self {
290            let mut headers = HeaderMap::new();
291            headers.insert(SERVER, "cloudflare".parse().unwrap());
292            Self {
293                url: Url::parse("https://example.com/turnstile").unwrap(),
294                headers,
295                method: Method::GET,
296                body: body.to_string(),
297                status,
298            }
299        }
300
301        fn response(&self) -> ChallengeResponse<'_> {
302            ChallengeResponse {
303                url: &self.url,
304                status: self.status,
305                headers: &self.headers,
306                body: &self.body,
307                request_method: &self.method,
308            }
309        }
310    }
311
312    struct StubCaptchaProvider;
313
314    #[async_trait]
315    impl CaptchaProvider for StubCaptchaProvider {
316        fn name(&self) -> &'static str {
317            "stub"
318        }
319
320        async fn solve(&self, _task: &CaptchaTask) -> CaptchaResult {
321            Ok(CaptchaSolution::new("turnstile-token"))
322        }
323    }
324
325    fn sample_html(with_form_action: bool) -> String {
326        let form_attr = if with_form_action {
327            r#"action="/submit/turnstile""#
328        } else {
329            ""
330        };
331
332        format!(
333            r#"
334            <html>
335              <body>
336                <form id="challenge-form" {form_attr} method="POST">
337                  <input type="hidden" name="foo" value="bar" />
338                  <input type="hidden" name="cf-turnstile-response" value="existing" />
339                </form>
340                <div class="cf-turnstile" data-sitekey="ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcd"></div>
341                <script src="https://challenges.cloudflare.com/turnstile/v0/api.js"></script>
342              </body>
343            </html>
344        "#
345        )
346    }
347
348    #[tokio::test]
349    async fn solve_turnstile_builds_submission() {
350        let html = sample_html(true);
351        let fixture = ResponseFixture::new(&html, 403);
352        let solver = TurnstileSolver::new().with_captcha_provider(Arc::new(StubCaptchaProvider));
353        assert!(TurnstileSolver::is_turnstile_challenge(&fixture.response()));
354
355        let submission = solver
356            .solve(&fixture.response())
357            .await
358            .expect("should solve");
359        assert_eq!(submission.method, Method::POST);
360        assert_eq!(
361            submission.url.as_str(),
362            "https://example.com/submit/turnstile"
363        );
364        assert_eq!(
365            submission.form_fields.get("cf-turnstile-response"),
366            Some(&"turnstile-token".to_string())
367        );
368        assert_eq!(submission.form_fields.get("foo"), Some(&"bar".to_string()));
369        assert!(submission.wait >= Duration::from_secs(1));
370        assert!(submission.wait <= Duration::from_secs(5));
371    }
372
373    #[tokio::test]
374    async fn solve_uses_current_url_when_form_absent() {
375        let html = sample_html(false);
376        let fixture = ResponseFixture::new(&html, 403);
377        let solver = TurnstileSolver::new().with_captcha_provider(Arc::new(StubCaptchaProvider));
378        let submission = solver
379            .solve(&fixture.response())
380            .await
381            .expect("should solve");
382        assert_eq!(submission.url.as_str(), "https://example.com/turnstile");
383    }
384
385    #[tokio::test]
386    async fn solve_requires_provider() {
387        let html = sample_html(true);
388        let fixture = ResponseFixture::new(&html, 403);
389        let solver = TurnstileSolver::new();
390        let err = solver
391            .solve(&fixture.response())
392            .await
393            .expect_err("should fail");
394        assert!(matches!(err, TurnstileError::CaptchaProviderMissing));
395    }
396}