agent_parser_ro/
lib.rs

1//! A comprehensive user agent string parser.
2//!
3//! This crate provides functionality to parse user agent strings and extract information about:
4//! - Browser/Client (Chrome, Safari, Firefox, etc.)
5//! - Operating System (Windows, macOS, Android, etc.)
6//! - Device Type (Mobile, Tablet, Desktop, etc.)
7//!
8//! # Examples
9//!
10//! ```
11//! use agent_parser_ro::{UserAgentParser, Browser, OperatingSystem, DeviceType};
12//!
13//! let info = UserAgentParser::parse("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
14//!
15//! assert_eq!(info.os, OperatingSystem::Windows);
16//! assert_eq!(info.browser, Browser::Chrome);
17//! assert_eq!(info.device_type, DeviceType::Desktop);
18//! ```
19
20use lazy_static::lazy_static;
21use regex::Regex;
22use serde::{Deserialize, Serialize};
23
24#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Hash, Clone)]
25pub enum Browser {
26    Chrome,
27    Safari,
28    Firefox,
29    Edge,
30    InternetExplorer,
31    Opera,
32    Dolphin,
33    Brave,
34    Puffin,
35    Maxthon,
36    Mercury,
37    Silk,
38    Vivaldi,
39    Yandex,
40    DuckDuckGo,
41    Tor,
42    Electron,
43    PhantomJS,
44    WebView,
45    Facebook,
46    Instagram,
47    Twitter,
48    Snapchat,
49    Googlebot,
50    Bingbot,
51    Yahoo,
52    Baidu,
53    UCBrowser,
54    SamsungBrowser,
55    OculusBrowser,
56    Unknown,
57}
58
59#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Hash, Clone)]
60pub enum OperatingSystem {
61    Windows,
62    WindowsPhone,
63    MacOS,
64    IOS,
65    IPadOS,
66    Android,
67    Linux,
68    Ubuntu,
69    Fedora,
70    Debian,
71    ChromeOS,
72    BlackBerry,
73    Symbian,
74    WebOS,
75    Bada,
76    Tizen,
77    Nintendo,
78    PlayStation,
79    Xbox,
80    Wii,
81    FreeBSD,
82    OpenBSD,
83    Solaris,
84    AIX,
85    HPUX,
86    HarmonyOS,
87    KaiOS,
88    Unknown,
89}
90
91#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Hash, Clone)]
92pub enum DeviceType {
93    Mobile,
94    Tablet,
95    Desktop,
96    Game,
97    TV,
98    Smartwatch,
99    VRHeadset,
100    CarSystem,
101    Bot,
102    Unknown,
103}
104
105#[derive(Debug, Serialize, Deserialize)]
106pub struct UserAgentInfo {
107    pub os: OperatingSystem,
108    pub browser: Browser,
109    pub device_type: DeviceType,
110}
111
112pub struct UserAgentParser;
113
114impl UserAgentParser {
115    /// Parses a user agent string and returns detected information
116    ///
117    /// # Arguments
118    ///
119    /// * `ua` - The user agent string to parse
120    ///
121    /// # Returns
122    ///
123    /// A `UserAgentInfo` struct containing the parsed information
124    ///
125    /// # Example
126    ///
127    /// ```
128    /// use agent_parser_ro::UserAgentParser;
129    ///
130    /// let info = UserAgentParser::parse("Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1");
131    /// ```
132    pub fn parse(ua: &str) -> UserAgentInfo {
133        lazy_static! {
134            // Updated OS regex to better handle Android and other mobile OS patterns
135            static ref OS_REGEX: [Regex; 2] =[
136                Regex::new(
137                r"(?i)(windows phone|mac os x|iphone os|ipad; cpu os|android|ubuntu|fedora|debian|cros|crkey|chrome os|blackberry|symbian|webos|bada|tizen|nintendo|playstation|xbox|wii|freebsd|openbsd|solaris|aix|hp-ux|harmonyos|kaios)"
138            ).unwrap(),
139                Regex::new(
140                 r"(?i)(windows|linux)"
141            ).unwrap()
142            ];
143
144            static ref BROWSER_REGEX: [Regex; 2] = [
145                Regex::new(
146                 r"(?i)(ucbrowser|samsungbrowser|oculusbrowser|ucweb|crios|headlesschrome|mobile safari|fxios|edge|edg|edga|edgios|msie|trident|opera|opr|dolphin|brave|puffin|maxthon|mercury|nokiabrowser|silk|vivaldi|yabrowser|duckduckgo|tor|electron|phantomjs|wv|fban|fbav|instagram|twitter|snapchat|googlebot|bingbot|yahoo! slurp|baiduspider)"
147            ).unwrap(),
148                Regex::new(
149                 r"(?i)(chrome|safari|firefox)"
150            ).unwrap()];
151
152            static ref DEVICE_REGEX: [Regex; 2] =[
153                 Regex::new(
154                r"(?i)(kfmawi|ipod|windows phone|blackberry|symbian|ipad|tablet|kindle|playbook|nexus|sm-t|sm-x|sm-s|gt-p|playstation|ps4|ps5|xbox|nintendo|wii|smart-tv|tv|appletv|roku|chromecast|crkey|fire tv|watch|apple watch|vive|oculus|tesla|android auto|carplay|googlebot|bingbot|slurp|baiduspider|facebookexternalhit|twitterbot|monitoring|scraper|yandexbot)"
155            ).unwrap(),
156                Regex::new(
157                r"(?i)(android|iphone|x11|x86_64)"
158            ).unwrap()
159            ];
160        }
161
162        // Default values
163        let mut os = OperatingSystem::Unknown;
164        let mut browser = Browser::Unknown;
165        let mut device_type = DeviceType::Unknown;
166        // Detect OS - now handles Android better
167        for reg in OS_REGEX.iter() {
168            if let Some(caps) = reg.captures(ua) {
169                let matched_os = caps.get(1).unwrap().as_str().to_lowercase();
170                os = match matched_os.as_str() {
171                    "windows" => OperatingSystem::Windows,
172                    "windows phone" => OperatingSystem::WindowsPhone,
173                    "mac os x" => OperatingSystem::MacOS,
174                    "iphone os" => OperatingSystem::IOS,
175                    "ipad; cpu os" => OperatingSystem::IPadOS,
176                    "android" => OperatingSystem::Android,
177                    "linux" => OperatingSystem::Linux,
178                    "ubuntu" => OperatingSystem::Ubuntu,
179                    "fedora" => OperatingSystem::Fedora,
180                    "debian" => OperatingSystem::Debian,
181                    "chrome os" | "cros" | "crkey" => OperatingSystem::ChromeOS,
182                    "blackberry" => OperatingSystem::BlackBerry,
183                    "symbian" => OperatingSystem::Symbian,
184                    "webos" => OperatingSystem::WebOS,
185                    "bada" => OperatingSystem::Bada,
186                    "tizen" => OperatingSystem::Tizen,
187                    "nintendo" => OperatingSystem::Nintendo,
188                    "playstation" => OperatingSystem::PlayStation,
189                    "xbox" => OperatingSystem::Xbox,
190                    "wii" => OperatingSystem::Wii,
191                    "freebsd" => OperatingSystem::FreeBSD,
192                    "openbsd" => OperatingSystem::OpenBSD,
193                    "solaris" => OperatingSystem::Solaris,
194                    "aix" => OperatingSystem::AIX,
195                    "hp-ux" => OperatingSystem::HPUX,
196                    "harmonyos" => OperatingSystem::HarmonyOS,
197                    "kaios" => OperatingSystem::KaiOS,
198                    _ => OperatingSystem::Unknown,
199                };
200            }
201            if os != OperatingSystem::Unknown {
202                break;
203            }
204        }
205
206        // Detect Browser
207        for reg in BROWSER_REGEX.iter() {
208            if let Some(caps) = reg.captures(ua) {
209                let matched_browser = caps.get(1).unwrap().as_str().to_lowercase();
210                browser = match matched_browser.as_str() {
211                    "chrome" | "headlesschrome" | "crios" => Browser::Chrome,
212                    "safari" | "mobile safari" => Browser::Safari,
213                    "firefox" | "fxios" => Browser::Firefox,
214                    "edge" | "edg" | "edga" | "edgios" => Browser::Edge,
215                    "msie" | "trident" => Browser::InternetExplorer,
216                    "opera" | "opr" => Browser::Opera,
217                    "ucbrowser" | "ucweb" => Browser::UCBrowser,
218                    "samsungbrowser" => Browser::SamsungBrowser,
219                    "oculusbrowser" => Browser::OculusBrowser,
220                    "dolphin" => Browser::Dolphin,
221                    "brave" => Browser::Brave,
222                    "puffin" => Browser::Puffin,
223                    "maxthon" => Browser::Maxthon,
224                    "mercury" => Browser::Mercury,
225                    "silk" => Browser::Silk,
226                    "vivaldi" => Browser::Vivaldi,
227                    "yabrowser" => Browser::Yandex,
228                    "duckduckgo" => Browser::DuckDuckGo,
229                    "tor" => Browser::Tor,
230                    "electron" => Browser::Electron,
231                    "phantomjs" => Browser::PhantomJS,
232                    "wv" => Browser::WebView,
233                    "fban" | "fbav" => Browser::Facebook,
234                    "instagram" => Browser::Instagram,
235                    "twitter" => Browser::Twitter,
236                    "snapchat" => Browser::Snapchat,
237                    "googlebot" => Browser::Googlebot,
238                    "bingbot" => Browser::Bingbot,
239                    "yahoo! slurp" => Browser::Yahoo,
240                    "baiduspider" => Browser::Baidu,
241                    _ => Browser::Unknown,
242                };
243                if browser != Browser::Unknown {
244                    break;
245                }
246            }
247        }
248
249        for reg in DEVICE_REGEX.iter() {
250            if let Some(caps) = reg.captures(ua) {
251                let device = caps.get(1).unwrap().as_str().to_lowercase();
252                device_type = match device.as_str() {
253                    "x11" | "x86_64" => DeviceType::Desktop,
254                    "iphone" | "ipod" | "android" | "windows phone" | "blackberry"|"sm-s" | "symbian" => {
255                        DeviceType::Mobile
256                    }
257                    "tablet" | "kindle" | "playbook" | "nexus" | "gt-p" | "sm-t"| "sm-x" | "ipad"|"kfmawi" => {
258                        DeviceType::Tablet
259                    }
260                    "playstation" | "ps4" | "ps5" | "xbox" | "nintendo" | "wii" => DeviceType::Game,
261                    "smart-tv" | "tv" | "appletv" | "roku" | "chromecast"| "crkey" | "fire tv" => {
262                        DeviceType::TV
263                    }
264                    "watch" | "apple watch" => DeviceType::Smartwatch,
265                    "vive" | "oculus" => DeviceType::VRHeadset,
266                    "tesla" | "android auto" | "carplay" => DeviceType::CarSystem,
267                    "googlebot"
268                    | "bingbot"
269                    | "slurp"
270                    | "baiduspider"
271                    | "facebookexternalhit"
272                    | "twitterbot"
273                    | "yandexbot"
274                    | "monitoring"
275                    | "scraper" => DeviceType::Bot,
276                    _ => DeviceType::Unknown,
277                };
278                if device_type != DeviceType::Unknown {
279                    break;
280                }
281            }
282        }
283        if device_type == DeviceType::Unknown {
284            if ua.contains("Windows") || ua.contains("Macintosh") || ua.contains("Linux") {
285                device_type = DeviceType::Desktop;
286            }
287        };
288        UserAgentInfo {
289            os,
290            browser,
291            device_type,
292        }
293    }
294}
295