kick_rust/fetch/useragent/
mod.rs

1//! User-Agent management for robust web scraping
2//!
3//! This module provides realistic, up-to-date User-Agent strings that rotate
4//! to avoid detection and ensure long-term compatibility with web services.
5
6use std::sync::atomic::{AtomicUsize, Ordering};
7use once_cell::sync::Lazy;
8
9/// Current User-Agent rotation index
10static UA_INDEX: AtomicUsize = AtomicUsize::new(0);
11
12/// Collection of realistic, up-to-date User-Agent strings
13static USER_AGENTS: Lazy<Vec<&'static str>> = Lazy::new(|| {
14    vec![
15        // Chrome on Windows (most common)
16        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
17        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
18        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
19
20        // Chrome on macOS
21        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
22        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
23
24        // Firefox on Windows
25        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0",
26        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0",
27
28        // Firefox on macOS
29        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:132.0) Gecko/20100101 Firefox/132.0",
30        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0",
31
32        // Edge on Windows
33        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
34        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0",
35
36        // Safari on macOS
37        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15",
38        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0 Safari/605.1.15",
39
40        // Chrome on Linux
41        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
42        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
43
44        // Firefox on Linux
45        "Mozilla/5.0 (X11; Linux x86_64; rv:132.0) Gecko/20100101 Firefox/132.0",
46        "Mozilla/5.0 (X11; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0",
47    ]
48});
49
50/// Get a rotating User-Agent string
51///
52/// This function rotates through different User-Agent strings to avoid
53/// detection and ensure realistic browser fingerprinting.
54pub fn get_rotating_user_agent() -> &'static str {
55    let index = UA_INDEX.fetch_add(1, Ordering::Relaxed) % USER_AGENTS.len();
56    USER_AGENTS[index]
57}
58
59/// Get a random User-Agent string
60pub fn get_random_user_agent() -> &'static str {
61    use rand::Rng;
62    let mut rng = rand::thread_rng();
63    let index = rng.gen_range(0..USER_AGENTS.len());
64    USER_AGENTS[index]
65}
66
67/// Get the latest Chrome User-Agent (most compatible)
68pub fn get_latest_chrome_user_agent() -> &'static str {
69    USER_AGENTS[0]
70}
71
72/// Get a specific User-Agent by index
73pub fn get_user_agent(index: usize) -> Option<&'static str> {
74    USER_AGENTS.get(index).copied()
75}
76
77/// Get the total number of available User-Agents
78pub fn get_user_agent_count() -> usize {
79    USER_AGENTS.len()
80}
81
82/// Check if a User-Agent string is up-to-date
83pub fn is_user_agent_current(user_agent: &str) -> bool {
84    // Check if the user agent contains recent browser versions
85    user_agent.contains("Chrome/13") || // Chrome 130+
86    user_agent.contains("Firefox/13") || // Firefox 130+
87    user_agent.contains("Safari/605") || // Safari 18+
88    user_agent.contains("Edg/13") // Edge 130+
89}
90
91/// Generate a realistic browser fingerprint
92pub fn generate_browser_fingerprint() -> BrowserFingerprint {
93    let user_agent = get_rotating_user_agent();
94
95    BrowserFingerprint {
96        user_agent: user_agent.to_string(),
97        sec_ch_ua: extract_sec_ch_ua(user_agent),
98        sec_ch_ua_mobile: "false".to_string(),
99        sec_ch_ua_platform: extract_platform(user_agent),
100        accept_language: "en-US,en;q=0.9".to_string(),
101        accept_encoding: "gzip, deflate, br".to_string(),
102    }
103}
104
105/// Browser fingerprint for realistic request headers
106#[derive(Debug, Clone)]
107pub struct BrowserFingerprint {
108    pub user_agent: String,
109    pub sec_ch_ua: String,
110    pub sec_ch_ua_mobile: String,
111    pub sec_ch_ua_platform: String,
112    pub accept_language: String,
113    pub accept_encoding: String,
114}
115
116impl BrowserFingerprint {
117    /// Get headers for curl requests
118    pub fn get_curl_headers(&self) -> Vec<String> {
119        vec![
120            format!("Accept: application/json, text/plain, */*"),
121            format!("Accept-Language: {}", self.accept_language),
122            // format!("Accept-Encoding: {}", self.accept_encoding), // Skip to avoid compression issues
123            format!("Cache-Control: no-cache"),
124            format!("Pragma: no-cache"),
125            format!("Sec-Ch-Ua: {}", self.sec_ch_ua),
126            format!("Sec-Ch-Ua-Mobile: {}", self.sec_ch_ua_mobile),
127            format!("Sec-Ch-Ua-Platform: {}", self.sec_ch_ua_platform),
128            format!("Sec-Fetch-Dest: empty"),
129            format!("Sec-Fetch-Mode: cors"),
130            format!("Sec-Fetch-Site: same-origin"),
131            format!("Referer: https://kick.com/"),
132            format!("Origin: https://kick.com"),
133            format!("User-Agent: {}", self.user_agent),
134        ]
135    }
136}
137
138/// Extract Sec-CH-UA from User-Agent
139fn extract_sec_ch_ua(user_agent: &str) -> String {
140    if user_agent.contains("Chrome") {
141        if user_agent.contains("Edg") {
142            // Edge
143            let version = extract_version(user_agent, "Edg/");
144            format!("\"Microsoft Edge\";v=\"{}\", \"Chromium\";v=\"{}\", \"Not_A Brand\";v=\"99\"",
145                    version.unwrap_or("131"),
146                    extract_version(user_agent, "Chrome/").unwrap_or("131"))
147        } else {
148            // Chrome
149            let version = extract_version(user_agent, "Chrome/").unwrap_or("131");
150            format!("\"Google Chrome\";v=\"{}\", \"Chromium\";v=\"{}\", \"Not_A Brand\";v=\"99\"",
151                    version, version)
152        }
153    } else if user_agent.contains("Firefox") {
154        "\"Firefox\";v=\"132\"".to_string()
155    } else if user_agent.contains("Safari") {
156        "\"Safari\";v=\"18\"".to_string()
157    } else {
158        "\"Not_A Brand\";v=\"99\"".to_string()
159    }
160}
161
162/// Extract platform from User-Agent
163fn extract_platform(user_agent: &str) -> String {
164    if user_agent.contains("Windows") {
165        "\"Windows\"".to_string()
166    } else if user_agent.contains("Macintosh") {
167        "\"macOS\"".to_string()
168    } else if user_agent.contains("Linux") {
169        "\"Linux\"".to_string()
170    } else {
171        "\"Unknown\"".to_string()
172    }
173}
174
175/// Extract version number from User-Agent string
176fn extract_version(user_agent: &str, prefix: &str) -> Option<&'static str> {
177    // This is simplified - in production you'd want more robust parsing
178    if let Some(start) = user_agent.find(prefix) {
179        let start = start + prefix.len();
180        if let Some(end) = user_agent[start..].find(|c| c == ' ' || c == ';') {
181            let version = &user_agent[start..start + end];
182            // Convert to static string (simplified for this example)
183            match version {
184                "131.0.0.0" => Some("131"),
185                "130.0.0.0" => Some("130"),
186                "129.0.0.0" => Some("129"),
187                "132.0" => Some("132"),
188                "131.0" => Some("131"),
189                "130.0" => Some("130"),
190                _ => Some("131"), // Default to latest
191            }
192        } else {
193            Some("131") // Default to latest
194        }
195    } else {
196        Some("131") // Default to latest
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203
204    #[test]
205    fn test_rotating_user_agent() {
206        // Reset the counter to ensure consistent test behavior
207        UA_INDEX.store(0, Ordering::Relaxed);
208
209        let ua1 = get_rotating_user_agent();
210        let ua2 = get_rotating_user_agent();
211
212        assert!(!ua1.is_empty());
213        assert!(!ua2.is_empty());
214        // They might be the same if we have many agents, but they should be valid
215        assert!(is_user_agent_current(ua1));
216        assert!(is_user_agent_current(ua2));
217    }
218
219    #[test]
220    fn test_random_user_agent() {
221        let ua = get_random_user_agent();
222        assert!(!ua.is_empty());
223        // Test multiple attempts since user agents are random
224        let mut found_current = false;
225        for _ in 0..10 {
226            let test_ua = get_random_user_agent();
227            if is_user_agent_current(&test_ua) {
228                found_current = true;
229                break;
230            }
231        }
232        assert!(found_current, "Should find at least one current user agent in 10 attempts");
233    }
234
235    #[test]
236    fn test_browser_fingerprint() {
237        let fingerprint = generate_browser_fingerprint();
238        assert!(!fingerprint.user_agent.is_empty());
239        assert!(!fingerprint.sec_ch_ua.is_empty());
240        assert!(!fingerprint.sec_ch_ua_platform.is_empty());
241
242        let headers = fingerprint.get_curl_headers();
243        assert!(!headers.is_empty());
244
245        // Check that User-Agent is included
246        assert!(headers.iter().any(|h| h.starts_with("User-Agent:")));
247    }
248
249    #[test]
250    fn test_user_agent_count() {
251        let count = get_user_agent_count();
252        assert!(count > 10);
253    }
254}