cloudscraper_rs/challenges/solvers/
turnstile.rs1use std::collections::HashMap;
8use std::sync::Arc;
9use std::time::Duration;
10
11use html_escape::decode_html_entities;
12use once_cell::sync::Lazy;
13use rand::Rng;
14use regex::{Regex, RegexBuilder};
15use thiserror::Error;
16
17use crate::challenges::core::{
18 ChallengeExecutionError, ChallengeHttpClient, ChallengeHttpResponse, ChallengeResponse,
19 ChallengeSubmission, OriginalRequest, execute_challenge_submission, is_cloudflare_response,
20 origin_from_url,
21};
22use crate::external_deps::captcha::{CaptchaError, CaptchaProvider, CaptchaTask};
23
24use super::ChallengeSolver;
25
26const DEFAULT_DELAY_MIN_SECS: f32 = 1.0;
27const DEFAULT_DELAY_MAX_SECS: f32 = 5.0;
28
29pub struct TurnstileSolver {
31 delay_min: Duration,
32 delay_max: Duration,
33 captcha_provider: Option<Arc<dyn CaptchaProvider>>,
34}
35
36impl TurnstileSolver {
37 pub fn new() -> Self {
39 Self {
40 delay_min: Duration::from_secs_f32(DEFAULT_DELAY_MIN_SECS),
41 delay_max: Duration::from_secs_f32(DEFAULT_DELAY_MAX_SECS),
42 captcha_provider: None,
43 }
44 }
45
46 pub fn with_delay_range(mut self, min: Duration, max: Duration) -> Self {
48 self.delay_min = min;
49 self.delay_max = if max < min { min } else { max };
50 self
51 }
52
53 pub fn with_captcha_provider(mut self, provider: Arc<dyn CaptchaProvider>) -> Self {
55 self.captcha_provider = Some(provider);
56 self
57 }
58
59 pub fn set_captcha_provider(&mut self, provider: Arc<dyn CaptchaProvider>) {
61 self.captcha_provider = Some(provider);
62 }
63
64 pub fn clear_captcha_provider(&mut self) {
66 self.captcha_provider = None;
67 }
68
69 pub fn is_turnstile_challenge(response: &ChallengeResponse<'_>) -> bool {
71 is_cloudflare_response(response)
72 && matches!(response.status, 403 | 429 | 503)
73 && (TURNSTILE_WIDGET_RE.is_match(response.body)
74 || TURNSTILE_SCRIPT_RE.is_match(response.body)
75 || TURNSTILE_SITEKEY_RE.is_match(response.body))
76 }
77
78 pub async fn solve(
80 &self,
81 response: &ChallengeResponse<'_>,
82 ) -> Result<ChallengeSubmission, TurnstileError> {
83 if !Self::is_turnstile_challenge(response) {
84 return Err(TurnstileError::NotTurnstileChallenge);
85 }
86
87 let provider = self
88 .captcha_provider
89 .as_ref()
90 .ok_or(TurnstileError::CaptchaProviderMissing)?;
91
92 let info = Self::extract_turnstile_info(response)?;
93 let task =
94 CaptchaTask::new(info.site_key.clone(), response.url.clone()).with_action("turnstile");
95 let solution = provider
96 .solve(&task)
97 .await
98 .map_err(TurnstileError::Captcha)?;
99
100 let payload = Self::build_payload(response.body, solution.token);
101 self.build_submission(response, &info.form_action, payload)
102 }
103
104 pub async fn solve_and_submit(
106 &self,
107 client: Arc<dyn ChallengeHttpClient>,
108 response: &ChallengeResponse<'_>,
109 original_request: OriginalRequest,
110 ) -> Result<ChallengeHttpResponse, TurnstileError> {
111 let submission = self.solve(response).await?;
112 execute_challenge_submission(client, submission, original_request)
113 .await
114 .map_err(TurnstileError::Submission)
115 }
116
117 fn build_submission(
118 &self,
119 response: &ChallengeResponse<'_>,
120 form_action: &str,
121 mut payload: HashMap<String, String>,
122 ) -> Result<ChallengeSubmission, TurnstileError> {
123 let form_action = decode_html_entities(form_action).into_owned();
124 let target_url = response
125 .url
126 .join(&form_action)
127 .map_err(|err| TurnstileError::InvalidFormAction(form_action.clone(), err))?;
128
129 let mut headers = HashMap::new();
130 headers.insert(
131 "Content-Type".into(),
132 "application/x-www-form-urlencoded".into(),
133 );
134 headers.insert("Referer".into(), response.url.as_str().to_string());
135 headers.insert("Origin".into(), origin_from_url(response.url));
136
137 let wait = self.random_delay();
138 payload.entry("cf-turnstile-response".into()).or_default();
139
140 Ok(ChallengeSubmission::new(
141 http::Method::POST,
142 target_url,
143 payload,
144 headers,
145 wait,
146 ))
147 }
148
149 fn random_delay(&self) -> Duration {
150 if self.delay_max <= self.delay_min {
151 return self.delay_min;
152 }
153 let mut rng = rand::thread_rng();
154 let min = self.delay_min.as_secs_f32();
155 let max = self.delay_max.as_secs_f32();
156 Duration::from_secs_f32(rng.gen_range(min..max))
157 }
158
159 fn extract_turnstile_info(
160 response: &ChallengeResponse<'_>,
161 ) -> Result<TurnstileInfo, TurnstileError> {
162 let body = response.body;
163 let site_key = TURNSTILE_SITEKEY_RE
164 .captures(body)
165 .and_then(|caps| caps.get(1))
166 .map(|m| m.as_str().to_string())
167 .ok_or(TurnstileError::MissingSiteKey)?;
168
169 let form_action = FORM_ACTION_RE
170 .captures(body)
171 .and_then(|caps| caps.get(1))
172 .map(|m| m.as_str().to_string())
173 .unwrap_or_else(|| response.url.as_str().to_string());
174
175 Ok(TurnstileInfo {
176 site_key,
177 form_action,
178 })
179 }
180
181 fn build_payload(body: &str, token: String) -> HashMap<String, String> {
182 let mut payload = HashMap::new();
183 payload.insert("cf-turnstile-response".into(), token);
184
185 for caps in INPUT_FIELD_RE.captures_iter(body) {
186 if let (Some(name), Some(value)) = (caps.get(1), caps.get(2)) {
187 let key = name.as_str();
188 if key != "cf-turnstile-response" && !payload.contains_key(key) {
189 payload.insert(key.to_string(), value.as_str().to_string());
190 }
191 }
192 }
193
194 payload
195 }
196}
197
198impl Default for TurnstileSolver {
199 fn default() -> Self {
200 Self::new()
201 }
202}
203
204impl ChallengeSolver for TurnstileSolver {
205 fn name(&self) -> &'static str {
206 "turnstile"
207 }
208}
209
210struct TurnstileInfo {
211 site_key: String,
212 form_action: String,
213}
214
215#[derive(Debug, Error)]
216pub enum TurnstileError {
217 #[error("response is not a Cloudflare Turnstile challenge")]
218 NotTurnstileChallenge,
219 #[error("captcha provider missing for Turnstile challenge")]
220 CaptchaProviderMissing,
221 #[error("missing Turnstile site key")]
222 MissingSiteKey,
223 #[error("invalid form action '{0}': {1}")]
224 InvalidFormAction(String, url::ParseError),
225 #[error("captcha provider error: {0}")]
226 Captcha(#[source] CaptchaError),
227 #[error("challenge submission failed: {0}")]
228 Submission(#[source] ChallengeExecutionError),
229}
230
231static TURNSTILE_WIDGET_RE: Lazy<Regex> = Lazy::new(|| {
232 RegexBuilder::new(r#"class=['"][^'"]*cf-turnstile[^'"]*['"]"#)
233 .case_insensitive(true)
234 .dot_matches_new_line(true)
235 .build()
236 .expect("invalid turnstile widget regex")
237});
238
239static TURNSTILE_SCRIPT_RE: Lazy<Regex> = Lazy::new(|| {
240 RegexBuilder::new(r#"src=['"]https://challenges\.cloudflare\.com/turnstile/v0/api\.js"#)
241 .case_insensitive(true)
242 .dot_matches_new_line(true)
243 .build()
244 .expect("invalid turnstile script regex")
245});
246
247static TURNSTILE_SITEKEY_RE: Lazy<Regex> = Lazy::new(|| {
248 RegexBuilder::new(r#"data-sitekey=['"]([0-9A-Za-z]{40})['"]"#)
249 .case_insensitive(true)
250 .dot_matches_new_line(true)
251 .build()
252 .expect("invalid turnstile site key regex")
253});
254
255static FORM_ACTION_RE: Lazy<Regex> = Lazy::new(|| {
256 RegexBuilder::new(r#"<form[^>]*action=['"]([^'"]+)['"]"#)
257 .case_insensitive(true)
258 .dot_matches_new_line(true)
259 .build()
260 .expect("invalid turnstile form action regex")
261});
262
263static INPUT_FIELD_RE: Lazy<Regex> = Lazy::new(|| {
264 RegexBuilder::new(r#"<input[^>]*name=['"]([^'"]+)['"][^>]*value=['"]([^'"]*)['"]"#)
265 .case_insensitive(true)
266 .dot_matches_new_line(true)
267 .build()
268 .expect("invalid input field regex")
269});
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274 use async_trait::async_trait;
275 use http::{HeaderMap, Method, header::SERVER};
276 use url::Url;
277
278 use crate::external_deps::captcha::{CaptchaResult, CaptchaSolution};
279
280 struct ResponseFixture {
281 url: Url,
282 headers: HeaderMap,
283 method: Method,
284 body: String,
285 status: u16,
286 }
287
288 impl ResponseFixture {
289 fn new(body: &str, status: u16) -> Self {
290 let mut headers = HeaderMap::new();
291 headers.insert(SERVER, "cloudflare".parse().unwrap());
292 Self {
293 url: Url::parse("https://example.com/turnstile").unwrap(),
294 headers,
295 method: Method::GET,
296 body: body.to_string(),
297 status,
298 }
299 }
300
301 fn response(&self) -> ChallengeResponse<'_> {
302 ChallengeResponse {
303 url: &self.url,
304 status: self.status,
305 headers: &self.headers,
306 body: &self.body,
307 request_method: &self.method,
308 }
309 }
310 }
311
312 struct StubCaptchaProvider;
313
314 #[async_trait]
315 impl CaptchaProvider for StubCaptchaProvider {
316 fn name(&self) -> &'static str {
317 "stub"
318 }
319
320 async fn solve(&self, _task: &CaptchaTask) -> CaptchaResult {
321 Ok(CaptchaSolution::new("turnstile-token"))
322 }
323 }
324
325 fn sample_html(with_form_action: bool) -> String {
326 let form_attr = if with_form_action {
327 r#"action="/submit/turnstile""#
328 } else {
329 ""
330 };
331
332 format!(
333 r#"
334 <html>
335 <body>
336 <form id="challenge-form" {form_attr} method="POST">
337 <input type="hidden" name="foo" value="bar" />
338 <input type="hidden" name="cf-turnstile-response" value="existing" />
339 </form>
340 <div class="cf-turnstile" data-sitekey="ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcd"></div>
341 <script src="https://challenges.cloudflare.com/turnstile/v0/api.js"></script>
342 </body>
343 </html>
344 "#
345 )
346 }
347
348 #[tokio::test]
349 async fn solve_turnstile_builds_submission() {
350 let html = sample_html(true);
351 let fixture = ResponseFixture::new(&html, 403);
352 let solver = TurnstileSolver::new().with_captcha_provider(Arc::new(StubCaptchaProvider));
353 assert!(TurnstileSolver::is_turnstile_challenge(&fixture.response()));
354
355 let submission = solver
356 .solve(&fixture.response())
357 .await
358 .expect("should solve");
359 assert_eq!(submission.method, Method::POST);
360 assert_eq!(
361 submission.url.as_str(),
362 "https://example.com/submit/turnstile"
363 );
364 assert_eq!(
365 submission.form_fields.get("cf-turnstile-response"),
366 Some(&"turnstile-token".to_string())
367 );
368 assert_eq!(submission.form_fields.get("foo"), Some(&"bar".to_string()));
369 assert!(submission.wait >= Duration::from_secs(1));
370 assert!(submission.wait <= Duration::from_secs(5));
371 }
372
373 #[tokio::test]
374 async fn solve_uses_current_url_when_form_absent() {
375 let html = sample_html(false);
376 let fixture = ResponseFixture::new(&html, 403);
377 let solver = TurnstileSolver::new().with_captcha_provider(Arc::new(StubCaptchaProvider));
378 let submission = solver
379 .solve(&fixture.response())
380 .await
381 .expect("should solve");
382 assert_eq!(submission.url.as_str(), "https://example.com/turnstile");
383 }
384
385 #[tokio::test]
386 async fn solve_requires_provider() {
387 let html = sample_html(true);
388 let fixture = ResponseFixture::new(&html, 403);
389 let solver = TurnstileSolver::new();
390 let err = solver
391 .solve(&fixture.response())
392 .await
393 .expect_err("should fail");
394 assert!(matches!(err, TurnstileError::CaptchaProviderMissing));
395 }
396}