cloudscraper_rs/challenges/user_agents/
mod.rs

1//! User-Agent profile manager.
2//!
3//! Responsibilities:
4//! - Load user-agent definitions (headers + cipher suites) from `browsers.json`.
5//! - Provide filtered selections based on platform/browser/mobile flags.
6//! - Allow custom overrides while falling back to sensible defaults.
7
8use once_cell::sync::Lazy;
9use rand::seq::SliceRandom;
10use rand::thread_rng;
11use serde::Deserialize;
12use std::borrow::Cow;
13use std::collections::HashMap;
14use std::fs;
15use std::io;
16use std::path::{Path, PathBuf};
17
18/// Top level representation of `browsers.json`.
19#[derive(Debug, Deserialize)]
20struct UserAgentData {
21    headers: HashMap<String, HeaderProfile>,
22    #[serde(rename = "cipherSuite")]
23    cipher_suites: HashMap<String, Vec<String>>,
24    #[serde(rename = "user_agents")]
25    user_agents: HashMap<DeviceKind, HashMap<String, HashMap<String, Vec<String>>>>,
26}
27
28#[derive(Debug, Deserialize, Clone)]
29struct HeaderProfile {
30    #[serde(rename = "User-Agent")]
31    user_agent: Option<String>,
32    #[serde(rename = "Accept")]
33    accept: String,
34    #[serde(rename = "Accept-Language")]
35    accept_language: String,
36    #[serde(rename = "Accept-Encoding")]
37    accept_encoding: String,
38}
39
40#[derive(Debug, Deserialize, Eq, PartialEq, Hash, Clone, Copy)]
41#[serde(rename_all = "lowercase")]
42enum DeviceKind {
43    Desktop,
44    Mobile,
45}
46
47/// Options to filter/select a profile.
48#[derive(Debug, Clone)]
49pub struct UserAgentOptions {
50    pub custom: Option<String>,
51    pub platform: Option<String>,
52    pub browser: Option<String>,
53    pub desktop: bool,
54    pub mobile: bool,
55    pub allow_brotli: bool,
56}
57
58impl Default for UserAgentOptions {
59    fn default() -> Self {
60        Self {
61            custom: None,
62            platform: None,
63            browser: None,
64            desktop: true,
65            mobile: true,
66            allow_brotli: false,
67        }
68    }
69}
70
71/// Final selected profile.
72#[derive(Debug, Clone)]
73pub struct UserAgentProfile {
74    pub headers: HashMap<String, String>,
75    pub cipher_suites: Vec<String>,
76}
77
78/// Provides user-agent profiles for challenge solvers.
79#[derive(Debug)]
80pub struct UserAgentManager {
81    data: UserAgentData,
82}
83
84/// Global singleton loaded on demand.
85static USER_AGENT_MANAGER: Lazy<Result<UserAgentManager, UserAgentError>> = Lazy::new(|| {
86    let paths = candidate_paths();
87    let mut last_err = None;
88
89    for path in paths {
90        match fs::read_to_string(&path) {
91            Ok(contents) => {
92                let data: UserAgentData =
93                    serde_json::from_str(&contents).map_err(|err| UserAgentError::InvalidJson {
94                        path: path.clone(),
95                        source: err,
96                    })?;
97                return Ok(UserAgentManager { data });
98            }
99            Err(err) if err.kind() == io::ErrorKind::NotFound => {
100                last_err = Some(UserAgentError::FileMissing { path });
101                continue;
102            }
103            Err(err) => {
104                return Err(UserAgentError::Io { path, source: err });
105            }
106        }
107    }
108
109    Err(last_err.unwrap_or(UserAgentError::NoDataSources))
110});
111
112/// Retrieve a profile using given options.
113pub fn get_user_agent_profile(opts: UserAgentOptions) -> Result<UserAgentProfile, UserAgentError> {
114    let manager = USER_AGENT_MANAGER
115        .as_ref()
116        .map_err(|err| UserAgentError::InitializationFailure(err.to_string()))?;
117    manager.select_profile(opts)
118}
119
120fn permitted_device_kinds(opts: &UserAgentOptions) -> Vec<DeviceKind> {
121    let mut kinds = Vec::new();
122    if opts.desktop {
123        kinds.push(DeviceKind::Desktop);
124    }
125    if opts.mobile {
126        kinds.push(DeviceKind::Mobile);
127    }
128    kinds
129}
130
131impl UserAgentManager {
132    fn select_profile(&self, opts: UserAgentOptions) -> Result<UserAgentProfile, UserAgentError> {
133        if !opts.desktop && !opts.mobile {
134            return Err(UserAgentError::InvalidOptions(
135                "Desktop and mobile cannot both be disabled".into(),
136            ));
137        }
138
139        if let Some(custom) = opts.custom {
140            return self.custom_profile(custom);
141        }
142
143        let permitted_kinds = permitted_device_kinds(&opts);
144
145        let platform = self.resolve_platform(&opts, &permitted_kinds)?;
146
147        let filtered = self.collect_profiles(&permitted_kinds, &platform);
148
149        if filtered.is_empty() {
150            return Err(UserAgentError::ProfileNotFound);
151        }
152
153        let browser = match opts.browser {
154            Some(browser) => {
155                if !filtered.contains_key(&browser) {
156                    return Err(UserAgentError::InvalidOptions(
157                        format!("Browser '{browser}' not available for platform '{platform}'")
158                            .into(),
159                    ));
160                }
161                browser
162            }
163            None => {
164                let browsers: Vec<String> = filtered.keys().cloned().collect();
165                random_choice(&browsers)
166            }
167        };
168
169        let agents = filtered
170            .get(&browser)
171            .ok_or(UserAgentError::ProfileNotFound)?;
172
173        if agents.is_empty() {
174            return Err(UserAgentError::ProfileNotFound);
175        }
176
177        let user_agent = random_choice(agents);
178        let mut headers = self
179            .data
180            .headers
181            .get(&browser)
182            .cloned()
183            .ok_or(UserAgentError::ProfileNotFound)?;
184        headers.user_agent = Some(user_agent);
185
186        let mut map = header_profile_to_map(&headers);
187        if !opts.allow_brotli {
188            strip_brotli(&mut map);
189        }
190
191        let cipher_suites = self
192            .data
193            .cipher_suites
194            .get(&browser)
195            .cloned()
196            .unwrap_or_default();
197
198        Ok(UserAgentProfile {
199            headers: map,
200            cipher_suites,
201        })
202    }
203
204    fn custom_profile(&self, custom: String) -> Result<UserAgentProfile, UserAgentError> {
205        if let Some((browser, headers)) = self.try_match_custom(&custom) {
206            let mut map = header_profile_to_map(headers);
207            map.insert("User-Agent".into(), custom.clone());
208
209            let cipher_suites = self
210                .data
211                .cipher_suites
212                .get(browser)
213                .cloned()
214                .unwrap_or_else(default_cipher_suites);
215
216            Ok(UserAgentProfile {
217                headers: map,
218                cipher_suites,
219            })
220        } else {
221            Ok(UserAgentProfile {
222                headers: default_headers(&custom),
223                cipher_suites: default_cipher_suites(),
224            })
225        }
226    }
227
228    fn try_match_custom(&self, custom: &str) -> Option<(&String, &HeaderProfile)> {
229        for device_map in self.data.user_agents.values() {
230            for platform_map in device_map.values() {
231                for (browser, agents) in platform_map {
232                    if agents.iter().any(|agent| agent.contains(custom))
233                        && let Some(headers) = self.data.headers.get(browser)
234                    {
235                        return Some((browser, headers));
236                    }
237                }
238            }
239        }
240        None
241    }
242
243    fn resolve_platform(
244        &self,
245        opts: &UserAgentOptions,
246        permitted_kinds: &[DeviceKind],
247    ) -> Result<String, UserAgentError> {
248        const VALID: &[&str] = &["linux", "windows", "darwin", "android", "ios"];
249
250        match opts.platform {
251            Some(ref platform) => {
252                if !VALID.contains(&platform.as_str()) {
253                    return Err(UserAgentError::InvalidOptions(
254                        format!("Invalid platform '{platform}'; valid: {}", VALID.join(", "))
255                            .into(),
256                    ));
257                }
258                if !self.platform_available(permitted_kinds, platform) {
259                    return Err(UserAgentError::ProfileNotFound);
260                }
261                Ok(platform.clone())
262            }
263            None => {
264                let candidates: Vec<&str> = VALID
265                    .iter()
266                    .copied()
267                    .filter(|platform| self.platform_available(permitted_kinds, platform))
268                    .collect();
269
270                if candidates.is_empty() {
271                    return Err(UserAgentError::ProfileNotFound);
272                }
273
274                Ok(random_choice(&candidates).to_string())
275            }
276        }
277    }
278
279    fn collect_profiles(
280        &self,
281        permitted_kinds: &[DeviceKind],
282        platform: &str,
283    ) -> HashMap<String, Vec<String>> {
284        let mut filtered = HashMap::new();
285
286        for device_kind in permitted_kinds {
287            if let Some(device_map) = self.data.user_agents.get(device_kind)
288                && let Some(platform_map) = device_map.get(platform)
289            {
290                for (browser, agents) in platform_map {
291                    if agents.is_empty() {
292                        continue;
293                    }
294
295                    filtered
296                        .entry(browser.clone())
297                        .or_insert_with(Vec::new)
298                        .extend(agents.iter().cloned());
299                }
300            }
301        }
302
303        filtered
304    }
305
306    fn platform_available(&self, permitted_kinds: &[DeviceKind], platform: &str) -> bool {
307        permitted_kinds.iter().any(|kind| {
308            self.data
309                .user_agents
310                .get(kind)
311                .and_then(|device_map| device_map.get(platform))
312                .map(|platform_map| platform_map.values().any(|agents| !agents.is_empty()))
313                .unwrap_or(false)
314        })
315    }
316}
317
318/// List all candidate paths to locate `browsers.json`.
319fn candidate_paths() -> Vec<PathBuf> {
320    let mut paths = Vec::new();
321    if let Ok(manifest) = std::env::var("CARGO_MANIFEST_DIR") {
322        let manifest_path = Path::new(&manifest);
323
324        let legacy_path = manifest_path
325            .join("cloudscraper-master (zied)")
326            .join("cloudscraper-master")
327            .join("cloudscraper")
328            .join("user_agent")
329            .join("browsers.json");
330        paths.push(legacy_path);
331
332        let embedded_path = manifest_path
333            .join("src")
334            .join("challenges")
335            .join("user_agents")
336            .join("browsers.json");
337        paths.push(embedded_path);
338    }
339
340    if let Ok(current) = std::env::current_dir() {
341        paths.push(current.join("browsers.json"));
342    }
343
344    paths
345}
346
347fn header_profile_to_map(profile: &HeaderProfile) -> HashMap<String, String> {
348    let mut map = HashMap::new();
349    if let Some(ref ua) = profile.user_agent {
350        map.insert("User-Agent".into(), ua.clone());
351    }
352    map.insert("Accept".into(), profile.accept.clone());
353    map.insert("Accept-Language".into(), profile.accept_language.clone());
354    map.insert("Accept-Encoding".into(), profile.accept_encoding.clone());
355    map
356}
357
358fn strip_brotli(headers: &mut HashMap<String, String>) {
359    if let Some(encoding) = headers.get_mut("Accept-Encoding") {
360        let filtered = encoding
361            .split(',')
362            .map(str::trim)
363            .filter(|enc| !enc.eq_ignore_ascii_case("br"))
364            .collect::<Vec<_>>()
365            .join(", ");
366        *encoding = filtered;
367    }
368}
369
370fn random_choice<T: Clone>(items: &[T]) -> T {
371    let mut rng = thread_rng();
372    items
373        .choose(&mut rng)
374        .cloned()
375        .expect("random choice on empty slice")
376}
377
378fn default_headers(custom: &str) -> HashMap<String, String> {
379    let mut map = HashMap::new();
380    map.insert("User-Agent".into(), custom.to_string());
381    map.insert(
382        "Accept".into(),
383        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
384            .into(),
385    );
386    map.insert("Accept-Language".into(), "en-US,en;q=0.9".into());
387    map.insert("Accept-Encoding".into(), "gzip, deflate".into());
388    map
389}
390
391fn default_cipher_suites() -> Vec<String> {
392    vec![
393        "TLS_AES_128_GCM_SHA256".into(),
394        "TLS_AES_256_GCM_SHA384".into(),
395        "ECDHE-ECDSA-AES128-GCM-SHA256".into(),
396        "ECDHE-RSA-AES128-GCM-SHA256".into(),
397        "ECDHE-ECDSA-AES256-GCM-SHA384".into(),
398        "ECDHE-RSA-AES256-GCM-SHA384".into(),
399    ]
400}
401
402#[derive(Debug, thiserror::Error)]
403pub enum UserAgentError {
404    #[error("user-agent data file missing: {path:?}")]
405    FileMissing { path: PathBuf },
406    #[error("user-agent JSON invalid at {path:?}: {source}")]
407    InvalidJson {
408        path: PathBuf,
409        source: serde_json::Error,
410    },
411    #[error("I/O error reading {path:?}: {source}")]
412    Io { path: PathBuf, source: io::Error },
413    #[error("no user-agent data sources found")]
414    NoDataSources,
415    #[error("invalid user-agent options: {0}")]
416    InvalidOptions(Cow<'static, str>),
417    #[error("no matching user-agent profile found")]
418    ProfileNotFound,
419    #[error("user-agent manager initialization failed: {0}")]
420    InitializationFailure(String),
421}
422
423#[cfg(test)]
424mod tests {
425    use super::*;
426
427    #[test]
428    fn default_selection_returns_profile() {
429        if let Ok(manager) = USER_AGENT_MANAGER.as_ref() {
430            let profile = manager.select_profile(UserAgentOptions::default()).unwrap();
431            assert!(profile.headers.contains_key("User-Agent"));
432        }
433    }
434}