cloudscraper_rs/challenges/solvers/
rate_limit.rs1use std::time::Duration;
7
8use chrono::{DateTime, Utc};
9use once_cell::sync::Lazy;
10use rand::Rng;
11use regex::{Regex, RegexBuilder};
12use thiserror::Error;
13
14use crate::challenges::core::{ChallengeResponse, is_cloudflare_response};
15
16use super::{ChallengeSolver, FailureRecorder, MitigationPlan};
17
18const DEFAULT_DELAY_MIN_SECS: f32 = 60.0;
19const DEFAULT_DELAY_MAX_SECS: f32 = 180.0;
20
21pub struct RateLimitHandler {
23 delay_min: Duration,
24 delay_max: Duration,
25}
26
27impl RateLimitHandler {
28 pub fn new() -> Self {
29 Self {
30 delay_min: Duration::from_secs_f32(DEFAULT_DELAY_MIN_SECS),
31 delay_max: Duration::from_secs_f32(DEFAULT_DELAY_MAX_SECS),
32 }
33 }
34
35 pub fn with_delay_range(mut self, min: Duration, max: Duration) -> Self {
36 self.delay_min = min;
37 self.delay_max = if max < min { min } else { max };
38 self
39 }
40
41 pub fn is_rate_limited(response: &ChallengeResponse<'_>) -> bool {
42 is_cloudflare_response(response)
43 && response.status == 429
44 && RATE_LIMIT_RE.is_match(response.body)
45 }
46
47 pub fn plan(
48 &self,
49 response: &ChallengeResponse<'_>,
50 state_recorder: Option<&dyn FailureRecorder>,
51 ) -> Result<MitigationPlan, RateLimitError> {
52 if !Self::is_rate_limited(response) {
53 return Err(RateLimitError::NotRateLimited);
54 }
55
56 if let Some(recorder) = state_recorder
57 && let Some(domain) = response.url.host_str()
58 {
59 recorder.record_failure(domain, "cf_rate_limit");
60 }
61
62 let (delay, source) = self.determine_delay(response);
63 let mut plan = MitigationPlan::retry_after(delay, "rate_limit");
64 plan.metadata.insert("delay_source".into(), source);
65 plan.metadata.insert("trigger".into(), "cf_1015".into());
66
67 Ok(plan)
68 }
69
70 fn determine_delay(&self, response: &ChallengeResponse<'_>) -> (Duration, String) {
71 if let Some(delay) = self.retry_after_header(response) {
72 return (delay, "header".into());
73 }
74
75 if let Some(delay) = self.delay_from_body(response.body) {
76 return (delay, "body".into());
77 }
78
79 (self.random_delay(), "default".into())
80 }
81
82 fn retry_after_header(&self, response: &ChallengeResponse<'_>) -> Option<Duration> {
83 use http::header::RETRY_AFTER;
84
85 let raw = response.headers.get(RETRY_AFTER)?.to_str().ok()?;
86 if let Ok(seconds) = raw.trim().parse::<f64>()
87 && seconds.is_finite()
88 && seconds >= 0.0
89 {
90 return Some(Duration::from_secs_f64(seconds));
91 }
92
93 if let Ok(date) = DateTime::parse_from_rfc2822(raw.trim())
94 .or_else(|_| DateTime::parse_from_rfc3339(raw.trim()))
95 && let Ok(duration) = (date.with_timezone(&Utc) - Utc::now()).to_std()
96 {
97 return Some(duration);
98 }
99
100 None
101 }
102
103 fn delay_from_body(&self, body: &str) -> Option<Duration> {
104 let caps = RATE_LIMIT_DELAY_RE.captures(body)?;
105 let amount: u64 = caps.get(1)?.as_str().parse().ok()?;
106 let unit = caps.get(2)?.as_str().to_lowercase();
107 let multiplier = match unit.as_str() {
108 "second" | "seconds" => 1,
109 "minute" | "minutes" => 60,
110 "hour" | "hours" => 3600,
111 _ => 1,
112 };
113 Some(Duration::from_secs(amount * multiplier))
114 }
115
116 fn random_delay(&self) -> Duration {
117 if self.delay_max <= self.delay_min {
118 return self.delay_min;
119 }
120 let mut rng = rand::thread_rng();
121 let min = self.delay_min.as_secs_f32();
122 let max = self.delay_max.as_secs_f32();
123 Duration::from_secs_f32(rng.gen_range(min..max))
124 }
125}
126
127impl Default for RateLimitHandler {
128 fn default() -> Self {
129 Self::new()
130 }
131}
132
133impl ChallengeSolver for RateLimitHandler {
134 fn name(&self) -> &'static str {
135 "rate_limit"
136 }
137}
138
139#[derive(Debug, Error)]
140pub enum RateLimitError {
141 #[error("response is not a Cloudflare rate limit challenge")]
142 NotRateLimited,
143}
144
145static RATE_LIMIT_RE: Lazy<Regex> = Lazy::new(|| {
146 RegexBuilder::new(
147 r#"(<span[^>]*class=['"]cf-error-code['"]>1015<|rate limited|You are being rate limited)"#,
148 )
149 .case_insensitive(true)
150 .dot_matches_new_line(true)
151 .build()
152 .expect("invalid rate limit regex")
153});
154
155static RATE_LIMIT_DELAY_RE: Lazy<Regex> = Lazy::new(|| {
156 RegexBuilder::new(r#"(\d+)\s*(second|seconds|minute|minutes|hour|hours)"#)
157 .case_insensitive(true)
158 .build()
159 .expect("invalid delay regex")
160});
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165 use http::{
166 HeaderMap, HeaderValue, Method,
167 header::{HeaderName, RETRY_AFTER, SERVER},
168 };
169 use url::Url;
170
171 struct ResponseFixture {
172 url: Url,
173 headers: HeaderMap,
174 method: Method,
175 body: String,
176 status: u16,
177 }
178
179 impl ResponseFixture {
180 fn new(body: &str, status: u16) -> Self {
181 Self {
182 url: Url::parse("https://example.com/rate-limited").unwrap(),
183 headers: HeaderMap::new(),
184 method: Method::GET,
185 body: body.to_string(),
186 status,
187 }
188 }
189
190 fn insert_header(&mut self, name: HeaderName, value: HeaderValue) {
191 self.headers.insert(name, value);
192 }
193
194 fn response(&self) -> ChallengeResponse<'_> {
195 ChallengeResponse {
196 url: &self.url,
197 status: self.status,
198 headers: &self.headers,
199 body: &self.body,
200 request_method: &self.method,
201 }
202 }
203 }
204
205 #[test]
206 fn detects_rate_limit() {
207 let mut fixture = ResponseFixture::new(
208 "<span class='cf-error-code'>1015</span>You are being rate limited",
209 429,
210 );
211 fixture.insert_header(SERVER, "cloudflare".parse().unwrap());
212 let response = fixture.response();
213 assert!(RateLimitHandler::is_rate_limited(&response));
214 }
215
216 #[test]
217 fn plan_uses_retry_after_header() {
218 let mut fixture =
219 ResponseFixture::new("<span class='cf-error-code'>1015</span> Rate limited", 429);
220 fixture.insert_header(SERVER, "cloudflare".parse().unwrap());
221 fixture.insert_header(RETRY_AFTER, "120".parse().unwrap());
222 let response = fixture.response();
223 let handler = RateLimitHandler::new();
224 let plan = handler.plan(&response, None).expect("plan");
225 assert!(plan.should_retry);
226 assert_eq!(plan.wait.unwrap(), Duration::from_secs(120));
227 assert_eq!(
228 plan.metadata.get("delay_source"),
229 Some(&"header".to_string())
230 );
231 }
232
233 #[test]
234 fn plan_extracts_delay_from_body() {
235 let mut fixture = ResponseFixture::new(
236 "<span class='cf-error-code'>1015</span> Please wait 10 minutes before retrying",
237 429,
238 );
239 fixture.insert_header(SERVER, "cloudflare".parse().unwrap());
240 let response = fixture.response();
241 let handler = RateLimitHandler::new();
242 let plan = handler.plan(&response, None).expect("plan");
243 assert!(plan.wait.unwrap() >= Duration::from_secs(600));
244 assert_eq!(plan.metadata.get("delay_source"), Some(&"body".to_string()));
245 }
246}