mimic_rs/
parser.rs

1//! User-Agent string parser.
2//!
3//! This module provides high-performance parsing of User-Agent strings
4//! to extract browser brand, version, platform, and mobile status.
5
6use crate::{Brand, ClientHints, Platform};
7use std::error::Error;
8use std::fmt;
9
10/// Error type for User-Agent parsing failures.
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum ParseError {
13    /// The User-Agent string is empty.
14    EmptyUserAgent,
15    /// No Chromium-based browser was detected.
16    NotChromiumBased,
17    /// Failed to parse the version number.
18    InvalidVersion,
19    /// The Chrome/Chromium token was not found.
20    ChromeTokenNotFound,
21}
22
23impl fmt::Display for ParseError {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        match self {
26            ParseError::EmptyUserAgent => write!(f, "empty user agent string"),
27            ParseError::NotChromiumBased => {
28                write!(f, "not a Chromium-based browser")
29            }
30            ParseError::InvalidVersion => write!(f, "invalid version number"),
31            ParseError::ChromeTokenNotFound => {
32                write!(f, "Chrome/Chromium token not found in user agent")
33            }
34        }
35    }
36}
37
38impl Error for ParseError {}
39
40/// Parse a User-Agent string into ClientHints.
41///
42/// This function extracts the browser brand, version, platform, and mobile
43/// status from a User-Agent string.
44pub fn parse(user_agent: &str) -> Result<ClientHints, ParseError> {
45    if user_agent.is_empty() {
46        return Err(ParseError::EmptyUserAgent);
47    }
48
49    // Detect platform first
50    let platform = detect_platform(user_agent);
51
52    // Detect if mobile
53    let is_mobile = detect_mobile(user_agent);
54
55    // Extract Chrome/Chromium version (required for all Chromium-based browsers)
56    let (chromium_version, major_version) = extract_chromium_version(user_agent)?;
57
58    // Detect specific browser brand
59    let brand = detect_brand(user_agent);
60
61    Ok(ClientHints::new(
62        brand,
63        major_version,
64        chromium_version,
65        platform,
66        is_mobile,
67    ))
68}
69
70/// Detect the platform from the User-Agent string.
71fn detect_platform(ua: &str) -> Platform {
72    // Check in order of specificity
73    if ua.contains("Android") {
74        Platform::Android
75    } else if ua.contains("CrOS") {
76        Platform::ChromeOS
77    } else if ua.contains("Windows") {
78        Platform::Windows
79    } else if ua.contains("Macintosh") || ua.contains("Mac OS") {
80        Platform::MacOS
81    } else if ua.contains("Linux") || ua.contains("X11") {
82        Platform::Linux
83    } else {
84        Platform::Unknown
85    }
86}
87
88/// Detect if the browser is mobile.
89fn detect_mobile(ua: &str) -> bool {
90    // Common mobile indicators in User-Agent strings
91    ua.contains("Mobile") || ua.contains("Android") && !ua.contains("Tablet")
92}
93
94/// Extract the Chromium version from the User-Agent string.
95///
96/// Returns (full_version, major_version).
97fn extract_chromium_version(ua: &str) -> Result<(String, u32), ParseError> {
98    // Look for Chrome/ or Chromium/ token
99    let chrome_pos = ua
100        .find("Chrome/")
101        .or_else(|| ua.find("Chromium/"))
102        .ok_or(ParseError::ChromeTokenNotFound)?;
103
104    // Extract the version string
105    let version_start = chrome_pos + if ua[chrome_pos..].starts_with("Chromium/") { 9 } else { 7 };
106    let version_end = ua[version_start..]
107        .find(|c: char| !c.is_ascii_digit() && c != '.')
108        .map(|i| version_start + i)
109        .unwrap_or(ua.len());
110
111    let version_str = &ua[version_start..version_end];
112
113    if version_str.is_empty() {
114        return Err(ParseError::InvalidVersion);
115    }
116
117    // Extract major version
118    let major_str = version_str
119        .split('.')
120        .next()
121        .ok_or(ParseError::InvalidVersion)?;
122
123    let major_version = major_str
124        .parse::<u32>()
125        .map_err(|_| ParseError::InvalidVersion)?;
126
127    Ok((version_str.to_string(), major_version))
128}
129
130/// Detect the browser brand from the User-Agent string.
131///
132/// Checks for specific browser identifiers in order of priority.
133fn detect_brand(ua: &str) -> Brand {
134    // Check for specific browsers in order of priority
135    // Some browsers include both their identifier and Chrome, so check specific ones first
136
137    // Edge: contains "Edg/" (not "Edge/" which is legacy EdgeHTML)
138    if ua.contains("Edg/") || ua.contains("EdgA/") || ua.contains("EdgiOS/") {
139        return Brand::Edge;
140    }
141
142    // Brave: may contain "Brave" in navigator or may not (for fingerprint protection)
143    // Often detected via Brave-specific patterns
144    if ua.contains("Brave") {
145        return Brand::Brave;
146    }
147
148    // Opera: contains "OPR/" or "Opera/"
149    if ua.contains("OPR/") || ua.contains("Opera/") {
150        return Brand::Opera;
151    }
152
153    // Vivaldi: contains "Vivaldi/"
154    if ua.contains("Vivaldi/") {
155        return Brand::Vivaldi;
156    }
157
158    // Samsung Internet: contains "SamsungBrowser/"
159    if ua.contains("SamsungBrowser/") {
160        return Brand::Samsung;
161    }
162
163    // Yandex: contains "YaBrowser/" or "Yandex/"
164    if ua.contains("YaBrowser/") || ua.contains("Yandex/") {
165        return Brand::Yandex;
166    }
167
168    // Generic Chromium: contains "Chromium/"
169    if ua.contains("Chromium/") {
170        return Brand::Chromium;
171    }
172
173    // Default to Chrome if Chrome/ is present
174    if ua.contains("Chrome/") {
175        return Brand::Chrome;
176    }
177
178    // Fallback to Chromium
179    Brand::Chromium
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn test_empty_ua() {
188        assert_eq!(parse(""), Err(ParseError::EmptyUserAgent));
189    }
190
191    #[test]
192    fn test_firefox() {
193        let ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0";
194        assert_eq!(parse(ua), Err(ParseError::ChromeTokenNotFound));
195    }
196
197    #[test]
198    fn test_safari() {
199        let ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15";
200        assert_eq!(parse(ua), Err(ParseError::ChromeTokenNotFound));
201    }
202
203    #[test]
204    fn test_chrome_windows() {
205        let ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.129 Safari/537.36";
206        let hints = parse(ua).unwrap();
207
208        assert_eq!(hints.brand(), Brand::Chrome);
209        assert_eq!(hints.major_version(), 120);
210        assert_eq!(hints.full_version(), "120.0.6099.129");
211        assert_eq!(hints.platform(), Platform::Windows);
212        assert!(!hints.is_mobile());
213    }
214
215    #[test]
216    fn test_chrome_macos() {
217        let ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
218        let hints = parse(ua).unwrap();
219        assert_eq!(hints.brand(), Brand::Chrome);
220        assert_eq!(hints.platform(), Platform::MacOS);
221        assert!(!hints.is_mobile());
222    }
223
224    #[test]
225    fn test_chrome_linux() {
226        let ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
227        let hints = parse(ua).unwrap();
228
229        assert_eq!(hints.brand(), Brand::Chrome);
230        assert_eq!(hints.platform(), Platform::Linux);
231    }
232
233    #[test]
234    fn test_chrome_android() {
235        let ua = "Mozilla/5.0 (Linux; Android 13; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.129 Mobile Safari/537.36";
236        let hints = parse(ua).unwrap();
237
238        assert_eq!(hints.brand(), Brand::Chrome);
239        assert_eq!(hints.platform(), Platform::Android);
240        assert!(hints.is_mobile());
241    }
242
243    #[test]
244    fn test_edge_windows() {
245        let ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.2210.91";
246        let hints = parse(ua).unwrap();
247
248        assert_eq!(hints.brand(), Brand::Edge);
249        assert_eq!(hints.major_version(), 120);
250        assert_eq!(hints.platform(), Platform::Windows);
251    }
252
253    #[test]
254    fn test_edge_android() {
255        let ua = "Mozilla/5.0 (Linux; Android 10; SM-G975F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36 EdgA/120.0.0.0";
256        let hints = parse(ua).unwrap();
257
258        assert_eq!(hints.brand(), Brand::Edge);
259        assert_eq!(hints.platform(), Platform::Android);
260        assert!(hints.is_mobile());
261    }
262
263    #[test]
264    fn test_brave() {
265        let ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Brave/120";
266        let hints = parse(ua).unwrap();
267
268        assert_eq!(hints.brand(), Brand::Brave);
269    }
270
271    #[test]
272    fn test_opera() {
273        let ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 OPR/106.0.0.0";
274        let hints = parse(ua).unwrap();
275
276        assert_eq!(hints.brand(), Brand::Opera);
277    }
278
279    #[test]
280    fn test_vivaldi() {
281        let ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Vivaldi/6.4.3160.47";
282        let hints = parse(ua).unwrap();
283
284        assert_eq!(hints.brand(), Brand::Vivaldi);
285    }
286
287    #[test]
288    fn test_samsung() {
289        let ua = "Mozilla/5.0 (Linux; Android 13; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/23.0 Chrome/115.0.0.0 Mobile Safari/537.36";
290        let hints = parse(ua).unwrap();
291
292        assert_eq!(hints.brand(), Brand::Samsung);
293        assert!(hints.is_mobile());
294    }
295
296    #[test]
297    fn test_yandex() {
298        let ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 YaBrowser/24.1.0.0 Safari/537.36";
299        let hints = parse(ua).unwrap();
300
301        assert_eq!(hints.brand(), Brand::Yandex);
302    }
303
304    #[test]
305    fn test_chromium() {
306        let ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/120.0.0.0 Safari/537.36";
307        let hints = parse(ua).unwrap();
308
309        assert_eq!(hints.brand(), Brand::Chromium);
310    }
311
312    #[test]
313    fn test_chrome_os() {
314        let ua = "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
315        let hints = parse(ua).unwrap();
316
317        assert_eq!(hints.platform(), Platform::ChromeOS);
318    }
319
320    #[test]
321    fn test_version_extraction() {
322        let ua = "Mozilla/5.0 Chrome/119.0.5678.90";
323        let (full, major) = extract_chromium_version(ua).unwrap();
324
325        assert_eq!(full, "119.0.5678.90");
326        assert_eq!(major, 119);
327    }
328
329    #[test]
330    fn test_platform_detection() {
331        assert_eq!(detect_platform("Windows NT 10.0"), Platform::Windows);
332        assert_eq!(detect_platform("Macintosh; Intel Mac OS X"), Platform::MacOS);
333        assert_eq!(detect_platform("X11; Linux x86_64"), Platform::Linux);
334        assert_eq!(detect_platform("Linux; Android 13"), Platform::Android);
335        assert_eq!(detect_platform("CrOS x86_64"), Platform::ChromeOS);
336        assert_eq!(detect_platform("Unknown OS"), Platform::Unknown);
337    }
338
339    #[test]
340    fn test_mobile_detection() {
341        assert!(detect_mobile("Mobile Safari/537.36"));
342        assert!(detect_mobile("Linux; Android 13"));
343        assert!(!detect_mobile("Windows NT 10.0"));
344        assert!(!detect_mobile("Macintosh"));
345    }
346
347    #[test]
348    fn test_parse_error_display() {
349        assert_eq!(
350            format!("{}", ParseError::EmptyUserAgent),
351            "empty user agent string"
352        );
353        assert_eq!(
354            format!("{}", ParseError::NotChromiumBased),
355            "not a Chromium-based browser"
356        );
357        assert_eq!(
358            format!("{}", ParseError::InvalidVersion),
359            "invalid version number"
360        );
361        assert_eq!(
362            format!("{}", ParseError::ChromeTokenNotFound),
363            "Chrome/Chromium token not found in user agent"
364        );
365    }
366}