simple_useragent/
lib.rs

1/*
2 * Copyright (c) 2024-2025 Bastiaan van der Plaat
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7#![doc = include_str!("../README.md")]
8#![forbid(unsafe_code)]
9
10use regex::Regex;
11
12// MARK: Rules
13mod rules_data {
14    include!(concat!(env!("OUT_DIR"), "/rules_data.rs"));
15}
16
17struct Rules {
18    user_agent: Vec<UserAgentRule>,
19    os: Vec<OsRule>,
20}
21
22struct UserAgentRule {
23    regex: Regex,
24    family_replacement: Option<&'static str>,
25    v1_replacement: Option<&'static str>,
26    v2_replacement: Option<&'static str>,
27    v3_replacement: Option<&'static str>,
28}
29
30struct OsRule {
31    regex: Regex,
32    os_replacement: Option<&'static str>,
33    os_v1_replacement: Option<&'static str>,
34    os_v2_replacement: Option<&'static str>,
35    os_v3_replacement: Option<&'static str>,
36}
37
38impl Rules {
39    fn parse() -> Self {
40        Self {
41            user_agent: rules_data::USER_AGENT_RULES
42                .iter()
43                .map(|rule| UserAgentRule {
44                    regex: Regex::new(rule.regex).expect("Invalid regex"),
45                    family_replacement: rule.family_replacement,
46                    v1_replacement: rule.v1_replacement,
47                    v2_replacement: rule.v2_replacement,
48                    v3_replacement: rule.v3_replacement,
49                })
50                .collect(),
51            os: rules_data::OS_RULES
52                .iter()
53                .map(|rule| OsRule {
54                    regex: Regex::new(rule.regex).expect("Invalid regex"),
55                    os_replacement: rule.os_replacement,
56                    os_v1_replacement: rule.os_v1_replacement,
57                    os_v2_replacement: rule.os_v2_replacement,
58                    os_v3_replacement: rule.os_v3_replacement,
59                })
60                .collect(),
61        }
62    }
63}
64
65// MARK: UserAgent
66/// User agent
67#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
68pub struct UserAgent {
69    /// Client
70    pub client: Client,
71    /// Operating System
72    pub os: OS,
73}
74
75/// Client
76#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
77pub struct Client {
78    /// Family
79    pub family: String,
80    /// Version
81    pub version: Option<String>,
82}
83
84/// Operating System
85#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
86pub struct OS {
87    /// Family
88    pub family: String,
89    /// Version
90    pub version: Option<String>,
91}
92
93// MARK: UserAgentParser
94/// User agent parser
95pub struct UserAgentParser {
96    rules: Rules,
97}
98
99impl Default for UserAgentParser {
100    fn default() -> Self {
101        Self {
102            rules: Rules::parse(),
103        }
104    }
105}
106
107impl UserAgentParser {
108    /// Create new user agent parser
109    pub fn new() -> Self {
110        Self::default()
111    }
112
113    /// Parse user agent
114    pub fn parse(&self, user_agent: &str) -> UserAgent {
115        UserAgent {
116            client: self.parse_client(user_agent),
117            os: self.parse_os(user_agent),
118        }
119    }
120
121    // https://github.com/ua-parser/uap-core/blob/master/docs/specification.md#user_agent_parsers
122    fn parse_client(&self, user_agent: &str) -> Client {
123        for rule in &self.rules.user_agent {
124            if let Some(captures) = rule.regex.captures(user_agent) {
125                let family = rule
126                    .family_replacement
127                    .map(|s| Self::map_replacement(s, &captures))
128                    .unwrap_or_else(|| captures[1].to_string());
129                let major = rule
130                    .v1_replacement
131                    .map(|s| Self::map_replacement(s, &captures))
132                    .or_else(|| captures.get(2).map(|m| m.as_str().to_string()));
133                let minor = rule
134                    .v2_replacement
135                    .map(|s| Self::map_replacement(s, &captures))
136                    .or_else(|| captures.get(3).map(|m| m.as_str().to_string()));
137                let patch = rule
138                    .v3_replacement
139                    .map(|s| Self::map_replacement(s, &captures))
140                    .or_else(|| captures.get(4).map(|m| m.as_str().to_string()));
141                return Client {
142                    family,
143                    version: Self::concat_version(major, minor, patch),
144                };
145            }
146        }
147        Client {
148            family: "Other".to_string(),
149            version: None,
150        }
151    }
152
153    // https://github.com/ua-parser/uap-core/blob/master/docs/specification.md#user_agent_parsers
154    fn parse_os(&self, user_agent: &str) -> OS {
155        for rule in &self.rules.os {
156            if let Some(captures) = rule.regex.captures(user_agent) {
157                let family = rule
158                    .os_replacement
159                    .map(|s| Self::map_replacement(s, &captures))
160                    .unwrap_or_else(|| captures[1].to_string());
161                let major = rule
162                    .os_v1_replacement
163                    .map(|s| Self::map_replacement(s, &captures))
164                    .or_else(|| captures.get(2).map(|m| m.as_str().to_string()));
165                let minor = rule
166                    .os_v2_replacement
167                    .map(|s| Self::map_replacement(s, &captures))
168                    .or_else(|| captures.get(3).map(|m| m.as_str().to_string()));
169                let patch = rule
170                    .os_v3_replacement
171                    .map(|s| Self::map_replacement(s, &captures))
172                    .or_else(|| captures.get(4).map(|m| m.as_str().to_string()));
173                return OS {
174                    family,
175                    version: Self::concat_version(major, minor, patch),
176                };
177            }
178        }
179        OS {
180            family: "Other".to_string(),
181            version: None,
182        }
183    }
184
185    fn map_replacement(replacement: &str, captures: &regex::Captures) -> String {
186        let mut result = replacement.to_string();
187        if result.contains("$1") {
188            result = result.replace("$1", &captures[1]);
189        }
190        if result.contains("$2") {
191            result = result.replace("$2", &captures[2]);
192        }
193        if result.contains("$3") {
194            result = result.replace("$3", &captures[3]);
195        }
196        result
197    }
198
199    fn concat_version(
200        major: Option<String>,
201        minor: Option<String>,
202        patch: Option<String>,
203    ) -> Option<String> {
204        let mut version = String::new();
205        if let Some(major) = major {
206            version.push_str(&major);
207        }
208        if let Some(minor) = minor {
209            version.push('.');
210            version.push_str(&minor);
211        }
212        if let Some(patch) = patch {
213            version.push('.');
214            version.push_str(&patch);
215        }
216        if version.is_empty() {
217            None
218        } else {
219            Some(version)
220        }
221    }
222}
223
224// MARK: Tests
225#[cfg(test)]
226mod test {
227    use super::*;
228
229    #[test]
230    fn test_parser() {
231        let parser = UserAgentParser::new();
232
233        let ua = parser.parse(
234            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0",
235        );
236        assert_eq!(ua.client.family, "Firefox");
237        assert_eq!(ua.client.version.as_deref(), Some("133.0"));
238        assert_eq!(ua.os.family, "Mac OS X");
239        assert_eq!(ua.os.version.as_deref(), Some("10.15"));
240
241        let ua = parser.parse(
242                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
243            );
244        assert_eq!(ua.client.family, "Chrome");
245        assert_eq!(ua.client.version.as_deref(), Some("91.0.4472"));
246        assert_eq!(ua.os.family, "Windows");
247        assert_eq!(ua.os.version.as_deref(), Some("10"));
248
249        let ua = parser.parse(
250                "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.1 Mobile/15E148 Safari/604.1",
251            );
252        assert_eq!(ua.client.family, "Mobile Safari");
253        assert_eq!(ua.client.version.as_deref(), Some("14.0.1"));
254        assert_eq!(ua.os.family, "iOS");
255        assert_eq!(ua.os.version.as_deref(), Some("14.6"));
256
257        let ua = parser.parse(
258                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.59",
259            );
260        assert_eq!(ua.client.family, "Edge");
261        assert_eq!(ua.client.version.as_deref(), Some("91.0.864"));
262        assert_eq!(ua.os.family, "Windows");
263        assert_eq!(ua.os.version.as_deref(), Some("10"));
264
265        let ua = parser.parse("UnknownUserAgent/1.0");
266        assert_eq!(ua.client.family, "Other");
267        assert_eq!(ua.client.version, None);
268        assert_eq!(ua.os.family, "Other");
269        assert_eq!(ua.os.version, None);
270    }
271}