cloudscraper_rs/challenges/user_agents/
mod.rs1use once_cell::sync::Lazy;
9use rand::seq::SliceRandom;
10use rand::thread_rng;
11use serde::Deserialize;
12use std::borrow::Cow;
13use std::collections::HashMap;
14use std::fs;
15use std::io;
16use std::path::{Path, PathBuf};
17
18#[derive(Debug, Deserialize)]
20struct UserAgentData {
21 headers: HashMap<String, HeaderProfile>,
22 #[serde(rename = "cipherSuite")]
23 cipher_suites: HashMap<String, Vec<String>>,
24 #[serde(rename = "user_agents")]
25 user_agents: HashMap<DeviceKind, HashMap<String, HashMap<String, Vec<String>>>>,
26}
27
28#[derive(Debug, Deserialize, Clone)]
29struct HeaderProfile {
30 #[serde(rename = "User-Agent")]
31 user_agent: Option<String>,
32 #[serde(rename = "Accept")]
33 accept: String,
34 #[serde(rename = "Accept-Language")]
35 accept_language: String,
36 #[serde(rename = "Accept-Encoding")]
37 accept_encoding: String,
38}
39
40#[derive(Debug, Deserialize, Eq, PartialEq, Hash, Clone, Copy)]
41#[serde(rename_all = "lowercase")]
42enum DeviceKind {
43 Desktop,
44 Mobile,
45}
46
47#[derive(Debug, Clone)]
49pub struct UserAgentOptions {
50 pub custom: Option<String>,
51 pub platform: Option<String>,
52 pub browser: Option<String>,
53 pub desktop: bool,
54 pub mobile: bool,
55 pub allow_brotli: bool,
56}
57
58impl Default for UserAgentOptions {
59 fn default() -> Self {
60 Self {
61 custom: None,
62 platform: None,
63 browser: None,
64 desktop: true,
65 mobile: true,
66 allow_brotli: false,
67 }
68 }
69}
70
71#[derive(Debug, Clone)]
73pub struct UserAgentProfile {
74 pub headers: HashMap<String, String>,
75 pub cipher_suites: Vec<String>,
76}
77
78#[derive(Debug)]
80pub struct UserAgentManager {
81 data: UserAgentData,
82}
83
84static USER_AGENT_MANAGER: Lazy<Result<UserAgentManager, UserAgentError>> = Lazy::new(|| {
86 let paths = candidate_paths();
87 let mut last_err = None;
88
89 for path in paths {
90 match fs::read_to_string(&path) {
91 Ok(contents) => {
92 let data: UserAgentData =
93 serde_json::from_str(&contents).map_err(|err| UserAgentError::InvalidJson {
94 path: path.clone(),
95 source: err,
96 })?;
97 return Ok(UserAgentManager { data });
98 }
99 Err(err) if err.kind() == io::ErrorKind::NotFound => {
100 last_err = Some(UserAgentError::FileMissing { path });
101 continue;
102 }
103 Err(err) => {
104 return Err(UserAgentError::Io { path, source: err });
105 }
106 }
107 }
108
109 Err(last_err.unwrap_or(UserAgentError::NoDataSources))
110});
111
112pub fn get_user_agent_profile(opts: UserAgentOptions) -> Result<UserAgentProfile, UserAgentError> {
114 let manager = USER_AGENT_MANAGER
115 .as_ref()
116 .map_err(|err| UserAgentError::InitializationFailure(err.to_string()))?;
117 manager.select_profile(opts)
118}
119
120fn permitted_device_kinds(opts: &UserAgentOptions) -> Vec<DeviceKind> {
121 let mut kinds = Vec::new();
122 if opts.desktop {
123 kinds.push(DeviceKind::Desktop);
124 }
125 if opts.mobile {
126 kinds.push(DeviceKind::Mobile);
127 }
128 kinds
129}
130
131impl UserAgentManager {
132 fn select_profile(&self, opts: UserAgentOptions) -> Result<UserAgentProfile, UserAgentError> {
133 if !opts.desktop && !opts.mobile {
134 return Err(UserAgentError::InvalidOptions(
135 "Desktop and mobile cannot both be disabled".into(),
136 ));
137 }
138
139 if let Some(custom) = opts.custom {
140 return self.custom_profile(custom);
141 }
142
143 let permitted_kinds = permitted_device_kinds(&opts);
144
145 let platform = self.resolve_platform(&opts, &permitted_kinds)?;
146
147 let filtered = self.collect_profiles(&permitted_kinds, &platform);
148
149 if filtered.is_empty() {
150 return Err(UserAgentError::ProfileNotFound);
151 }
152
153 let browser = match opts.browser {
154 Some(browser) => {
155 if !filtered.contains_key(&browser) {
156 return Err(UserAgentError::InvalidOptions(
157 format!("Browser '{browser}' not available for platform '{platform}'")
158 .into(),
159 ));
160 }
161 browser
162 }
163 None => {
164 let browsers: Vec<String> = filtered.keys().cloned().collect();
165 random_choice(&browsers)
166 }
167 };
168
169 let agents = filtered
170 .get(&browser)
171 .ok_or(UserAgentError::ProfileNotFound)?;
172
173 if agents.is_empty() {
174 return Err(UserAgentError::ProfileNotFound);
175 }
176
177 let user_agent = random_choice(agents);
178 let mut headers = self
179 .data
180 .headers
181 .get(&browser)
182 .cloned()
183 .ok_or(UserAgentError::ProfileNotFound)?;
184 headers.user_agent = Some(user_agent);
185
186 let mut map = header_profile_to_map(&headers);
187 if !opts.allow_brotli {
188 strip_brotli(&mut map);
189 }
190
191 let cipher_suites = self
192 .data
193 .cipher_suites
194 .get(&browser)
195 .cloned()
196 .unwrap_or_default();
197
198 Ok(UserAgentProfile {
199 headers: map,
200 cipher_suites,
201 })
202 }
203
204 fn custom_profile(&self, custom: String) -> Result<UserAgentProfile, UserAgentError> {
205 if let Some((browser, headers)) = self.try_match_custom(&custom) {
206 let mut map = header_profile_to_map(headers);
207 map.insert("User-Agent".into(), custom.clone());
208
209 let cipher_suites = self
210 .data
211 .cipher_suites
212 .get(browser)
213 .cloned()
214 .unwrap_or_else(default_cipher_suites);
215
216 Ok(UserAgentProfile {
217 headers: map,
218 cipher_suites,
219 })
220 } else {
221 Ok(UserAgentProfile {
222 headers: default_headers(&custom),
223 cipher_suites: default_cipher_suites(),
224 })
225 }
226 }
227
228 fn try_match_custom(&self, custom: &str) -> Option<(&String, &HeaderProfile)> {
229 for device_map in self.data.user_agents.values() {
230 for platform_map in device_map.values() {
231 for (browser, agents) in platform_map {
232 if agents.iter().any(|agent| agent.contains(custom))
233 && let Some(headers) = self.data.headers.get(browser)
234 {
235 return Some((browser, headers));
236 }
237 }
238 }
239 }
240 None
241 }
242
243 fn resolve_platform(
244 &self,
245 opts: &UserAgentOptions,
246 permitted_kinds: &[DeviceKind],
247 ) -> Result<String, UserAgentError> {
248 const VALID: &[&str] = &["linux", "windows", "darwin", "android", "ios"];
249
250 match opts.platform {
251 Some(ref platform) => {
252 if !VALID.contains(&platform.as_str()) {
253 return Err(UserAgentError::InvalidOptions(
254 format!("Invalid platform '{platform}'; valid: {}", VALID.join(", "))
255 .into(),
256 ));
257 }
258 if !self.platform_available(permitted_kinds, platform) {
259 return Err(UserAgentError::ProfileNotFound);
260 }
261 Ok(platform.clone())
262 }
263 None => {
264 let candidates: Vec<&str> = VALID
265 .iter()
266 .copied()
267 .filter(|platform| self.platform_available(permitted_kinds, platform))
268 .collect();
269
270 if candidates.is_empty() {
271 return Err(UserAgentError::ProfileNotFound);
272 }
273
274 Ok(random_choice(&candidates).to_string())
275 }
276 }
277 }
278
279 fn collect_profiles(
280 &self,
281 permitted_kinds: &[DeviceKind],
282 platform: &str,
283 ) -> HashMap<String, Vec<String>> {
284 let mut filtered = HashMap::new();
285
286 for device_kind in permitted_kinds {
287 if let Some(device_map) = self.data.user_agents.get(device_kind)
288 && let Some(platform_map) = device_map.get(platform)
289 {
290 for (browser, agents) in platform_map {
291 if agents.is_empty() {
292 continue;
293 }
294
295 filtered
296 .entry(browser.clone())
297 .or_insert_with(Vec::new)
298 .extend(agents.iter().cloned());
299 }
300 }
301 }
302
303 filtered
304 }
305
306 fn platform_available(&self, permitted_kinds: &[DeviceKind], platform: &str) -> bool {
307 permitted_kinds.iter().any(|kind| {
308 self.data
309 .user_agents
310 .get(kind)
311 .and_then(|device_map| device_map.get(platform))
312 .map(|platform_map| platform_map.values().any(|agents| !agents.is_empty()))
313 .unwrap_or(false)
314 })
315 }
316}
317
318fn candidate_paths() -> Vec<PathBuf> {
320 let mut paths = Vec::new();
321 if let Ok(manifest) = std::env::var("CARGO_MANIFEST_DIR") {
322 let manifest_path = Path::new(&manifest);
323
324 let legacy_path = manifest_path
325 .join("cloudscraper-master (zied)")
326 .join("cloudscraper-master")
327 .join("cloudscraper")
328 .join("user_agent")
329 .join("browsers.json");
330 paths.push(legacy_path);
331
332 let embedded_path = manifest_path
333 .join("src")
334 .join("challenges")
335 .join("user_agents")
336 .join("browsers.json");
337 paths.push(embedded_path);
338 }
339
340 if let Ok(current) = std::env::current_dir() {
341 paths.push(current.join("browsers.json"));
342 }
343
344 paths
345}
346
347fn header_profile_to_map(profile: &HeaderProfile) -> HashMap<String, String> {
348 let mut map = HashMap::new();
349 if let Some(ref ua) = profile.user_agent {
350 map.insert("User-Agent".into(), ua.clone());
351 }
352 map.insert("Accept".into(), profile.accept.clone());
353 map.insert("Accept-Language".into(), profile.accept_language.clone());
354 map.insert("Accept-Encoding".into(), profile.accept_encoding.clone());
355 map
356}
357
358fn strip_brotli(headers: &mut HashMap<String, String>) {
359 if let Some(encoding) = headers.get_mut("Accept-Encoding") {
360 let filtered = encoding
361 .split(',')
362 .map(str::trim)
363 .filter(|enc| !enc.eq_ignore_ascii_case("br"))
364 .collect::<Vec<_>>()
365 .join(", ");
366 *encoding = filtered;
367 }
368}
369
370fn random_choice<T: Clone>(items: &[T]) -> T {
371 let mut rng = thread_rng();
372 items
373 .choose(&mut rng)
374 .cloned()
375 .expect("random choice on empty slice")
376}
377
378fn default_headers(custom: &str) -> HashMap<String, String> {
379 let mut map = HashMap::new();
380 map.insert("User-Agent".into(), custom.to_string());
381 map.insert(
382 "Accept".into(),
383 "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
384 .into(),
385 );
386 map.insert("Accept-Language".into(), "en-US,en;q=0.9".into());
387 map.insert("Accept-Encoding".into(), "gzip, deflate".into());
388 map
389}
390
391fn default_cipher_suites() -> Vec<String> {
392 vec![
393 "TLS_AES_128_GCM_SHA256".into(),
394 "TLS_AES_256_GCM_SHA384".into(),
395 "ECDHE-ECDSA-AES128-GCM-SHA256".into(),
396 "ECDHE-RSA-AES128-GCM-SHA256".into(),
397 "ECDHE-ECDSA-AES256-GCM-SHA384".into(),
398 "ECDHE-RSA-AES256-GCM-SHA384".into(),
399 ]
400}
401
402#[derive(Debug, thiserror::Error)]
403pub enum UserAgentError {
404 #[error("user-agent data file missing: {path:?}")]
405 FileMissing { path: PathBuf },
406 #[error("user-agent JSON invalid at {path:?}: {source}")]
407 InvalidJson {
408 path: PathBuf,
409 source: serde_json::Error,
410 },
411 #[error("I/O error reading {path:?}: {source}")]
412 Io { path: PathBuf, source: io::Error },
413 #[error("no user-agent data sources found")]
414 NoDataSources,
415 #[error("invalid user-agent options: {0}")]
416 InvalidOptions(Cow<'static, str>),
417 #[error("no matching user-agent profile found")]
418 ProfileNotFound,
419 #[error("user-agent manager initialization failed: {0}")]
420 InitializationFailure(String),
421}
422
423#[cfg(test)]
424mod tests {
425 use super::*;
426
427 #[test]
428 fn default_selection_returns_profile() {
429 if let Ok(manager) = USER_AGENT_MANAGER.as_ref() {
430 let profile = manager.select_profile(UserAgentOptions::default()).unwrap();
431 assert!(profile.headers.contains_key("User-Agent"));
432 }
433 }
434}