user_agent_parser/lib.rs
1/*!
2# User Agent Parser
3
4A parser to get the product, OS, device, cpu, and engine information from a user agent, inspired by https://github.com/faisalman/ua-parser-js and https://github.com/ua-parser/uap-core
5
6## Usage
7
8You can make a **regexes.yaml** file or copy one from https://github.com/ua-parser/uap-core
9
10This is a simple example of **regexes.yaml**.
11
12```yaml
13user_agent_parsers:
14 - regex: '(ESPN)[%20| ]+Radio/(\d+)\.(\d+)\.(\d+) CFNetwork'
15 - regex: '(Namoroka|Shiretoko|Minefield)/(\d+)\.(\d+)\.(\d+(?:pre|))'
16 family_replacement: 'Firefox ($1)'
17 - regex: '(Android) Eclair'
18 v1_replacement: '2'
19 v2_replacement: '1'
20
21os_parsers:
22 - regex: 'Win(?:dows)? ?(95|98|3.1|NT|ME|2000|XP|Vista|7|CE)'
23 os_replacement: 'Windows'
24 os_v1_replacement: '$1'
25
26device_parsers:
27 - regex: '\bSmartWatch *\( *([^;]+) *; *([^;]+) *;'
28 device_replacement: '$1 $2'
29 brand_replacement: '$1'
30 model_replacement: '$2'
31```
32
33Then, use the `from_path` (or `from_str` if your YAML data is in-memory) associated function to create a `UserAgentParser` instance.
34
35
36```rust,ignore
37use user_agent_parser::UserAgentParser;
38
39let ua_parser = UserAgentParser::from_path("/path/to/regexes.yaml").unwrap();
40```
41
42Use the `parse_*` methods and input a user-agent string to get information.
43
44```rust,ignore
45use user_agent_parser::UserAgentParser;
46
47let ua_parser = UserAgentParser::from_path("/path/to/regexes.yaml").unwrap();
48
49let user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 [FBAN/FBIOS;FBAV/8.0.0.28.18;FBBV/1665515;FBDV/iPhone4,1;FBMD/iPhone;FBSN/iPhone OS;FBSV/7.0.4;FBSS/2; FBCR/Telekom.de;FBID/phone;FBLC/de_DE;FBOP/5]";
50
51let product = ua_parser.parse_product(user_agent);
52
53println!("{:#?}", product);
54
55// Product {
56// name: Some(
57// "Facebook",
58// ),
59// major: Some(
60// "8",
61// ),
62// minor: Some(
63// "0",
64// ),
65// patch: Some(
66// "0",
67// ),
68// }
69
70let os = ua_parser.parse_os(user_agent);
71
72println!("{:#?}", os);
73
74// OS {
75// name: Some(
76// "iOS",
77// ),
78// major: None,
79// minor: None,
80// patch: None,
81// patch_minor: None,
82// }
83
84let device = ua_parser.parse_device(user_agent);
85
86println!("{:#?}", device);
87
88// Device {
89// name: Some(
90// "iPhone",
91// ),
92// brand: Some(
93// "Apple",
94// ),
95// model: Some(
96// "iPhone4,1",
97// ),
98// }
99
100let cpu = ua_parser.parse_cpu(user_agent);
101
102println!("{:#?}", cpu);
103
104// CPU {
105// architecture: Some(
106// "amd64",
107// ),
108// }
109
110let engine = ua_parser.parse_engine(user_agent);
111
112println!("{:#?}", engine);
113
114// Engine {
115// name: Some(
116// "Gecko",
117// ),
118// major: Some(
119// "10",
120// ),
121// minor: Some(
122// "0",
123// ),
124// patch: None,
125// }
126```
127
128The lifetime of result instances of the `parse_*` methods depends on the user-agent string and the `UserAgentParser` instance. To make it independent, call the `into_owned` method.
129
130```rust,ignore
131use user_agent_parser::UserAgentParser;
132
133let ua_parser = UserAgentParser::from_path("/path/to/regexes.yaml").unwrap();
134
135let product = ua_parser.parse_product("Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/10.04 (lucid) Firefox/3.6.12").into_owned();
136```
137
138## Rocket Support
139
140This crate supports the Rocket framework. All you have to do is enabling the `rocket` feature for this crate.
141
142```toml
143[dependencies.user-agent-parser]
144version = "*"
145features = ["rocket"]
146```
147
148Let `Rocket` manage a `UserAgentParser` instance, and the `Product`, `OS`, `Device`, `CPU`, `Engine` models of this crate (plus the `UserAgent` model) can be used as *Request Guards*.
149
150```rust,ignore
151#[macro_use]
152extern crate rocket;
153
154use user_agent_parser::{UserAgentParser, UserAgent, Product, OS, Device, CPU, Engine};
155
156#[get("/")]
157fn index(user_agent: UserAgent, product: Product, os: OS, device: Device, cpu: CPU, engine: Engine) -> String {
158 format!("{user_agent:#?}\n{product:#?}\n{os:#?}\n{device:#?}\n{cpu:#?}\n{engine:#?}",
159 user_agent = user_agent,
160 product = product,
161 os = os,
162 device = device,
163 cpu = cpu,
164 engine = engine,
165 )
166}
167
168#[launch]
169fn rocket() -> _ {
170 rocket::build()
171 .manage(UserAgentParser::from_path("/path/to/regexes.yaml").unwrap())
172 .mount("/", routes![index])
173}
174```
175
176## Testing
177
178```bash
179# git clone --recurse-submodules git://github.com/magiclen/user-agent-parser.git
180
181git clone git://github.com/magiclen/user-agent-parser.git
182
183cd user-agent-parser
184
185git submodule init
186git submodule update --recursive
187
188cargo test
189```
190*/
191
192mod errors;
193mod models;
194mod regexes;
195
196#[cfg(feature = "rocket")]
197mod request_guards;
198
199use std::{borrow::Cow, fs, path::Path, str::FromStr};
200
201pub use errors::UserAgentParserError;
202pub use models::*;
203use onig::Regex;
204use regexes::*;
205use yaml_rust::{Yaml, YamlLoader};
206
207#[derive(Debug)]
208pub struct UserAgentParser {
209 replacement_regex: Regex,
210 product_regexes: Vec<ProductRegex>,
211 os_regexes: Vec<OSRegex>,
212 device_regexes: Vec<DeviceRegex>,
213 cpu_regexes: Vec<CPURegex>,
214 engine_regexes: Vec<EngineRegex>,
215}
216
217impl UserAgentParser {
218 /// Read the list of regular expressions (YAML data) from a file to create a `UserAgentParser` instance.
219 #[inline]
220 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<UserAgentParser, UserAgentParserError> {
221 let yaml = fs::read_to_string(path)?;
222
223 Self::from_str(yaml)
224 }
225
226 /// Read the list of regular expressions (YAML data) from a string to create a `UserAgentParser` instance.
227 #[allow(clippy::should_implement_trait)]
228 pub fn from_str<S: AsRef<str>>(yaml: S) -> Result<UserAgentParser, UserAgentParserError> {
229 let yamls = YamlLoader::load_from_str(yaml.as_ref())?;
230
231 if yamls.is_empty() {
232 Err(UserAgentParserError::IncorrectSource)
233 } else {
234 let yaml = &yamls[0];
235
236 match yaml.as_hash() {
237 Some(yaml) => {
238 let user_agent_parsers =
239 yaml.get(&Yaml::String("user_agent_parsers".to_string()));
240 let os_parsers = yaml.get(&Yaml::String("os_parsers".to_string()));
241 let device_parsers = yaml.get(&Yaml::String("device_parsers".to_string()));
242
243 let user_agent_regexes = match user_agent_parsers {
244 Some(user_agent_parsers) => ProductRegex::from_yaml(user_agent_parsers)?,
245 None => Vec::new(),
246 };
247
248 let os_regexes = match os_parsers {
249 Some(os_parsers) => OSRegex::from_yaml(os_parsers)?,
250 None => Vec::new(),
251 };
252
253 let device_regexes = match device_parsers {
254 Some(device_parsers) => DeviceRegex::from_yaml(device_parsers)?,
255 None => Vec::new(),
256 };
257
258 Ok(UserAgentParser {
259 replacement_regex: Regex::new(r"\$(\d){1,9}").unwrap(),
260 product_regexes: user_agent_regexes,
261 os_regexes,
262 device_regexes,
263 cpu_regexes: CPURegex::built_in_regexes(),
264 engine_regexes: EngineRegex::built_in_regexes(),
265 })
266 },
267 None => Err(UserAgentParserError::IncorrectSource),
268 }
269 }
270 }
271}
272
273macro_rules! get_string {
274 ($index:expr, $replacement:expr, $replacement_regex:expr, $captures:expr) => {
275 match $replacement.as_ref() {
276 Some(replacement) => {
277 let replacement_captures_vec: Vec<_> =
278 $replacement_regex.captures_iter(replacement).collect();
279
280 if replacement_captures_vec.is_empty() {
281 Some(Cow::from(replacement))
282 } else {
283 let mut replacement = replacement.to_string();
284
285 let captures_len = $captures.len();
286
287 for replacement_captures in replacement_captures_vec.into_iter().rev() {
288 let index = replacement_captures.at(1).unwrap().parse::<usize>().unwrap();
289
290 let pos = replacement_captures.pos(0).unwrap();
291
292 if index < captures_len {
293 replacement.replace_range(
294 pos.0..pos.1,
295 $captures.at(index).unwrap_or_default(),
296 );
297 } else {
298 replacement.replace_range(pos.0..pos.1, "");
299 }
300 }
301
302 let start_trimmed_replacement = replacement.trim_start();
303
304 if start_trimmed_replacement.len() != replacement.len() {
305 replacement = start_trimmed_replacement.trim_end().to_string();
306 } else {
307 replacement.truncate(replacement.trim_end().len());
308 }
309
310 if replacement.is_empty() {
311 None
312 } else {
313 Some(Cow::from(replacement))
314 }
315 }
316 },
317 None => match $captures.at($index) {
318 Some(s) => {
319 let s = s.trim();
320
321 if s.is_empty() {
322 None
323 } else {
324 Some(Cow::from(s))
325 }
326 },
327 None => None,
328 },
329 }
330 };
331
332 ($index:expr, $captures:expr) => {
333 match $captures.at($index) {
334 Some(s) => {
335 let s = s.trim();
336
337 if s.is_empty() {
338 None
339 } else {
340 Some(Cow::from(s))
341 }
342 },
343 None => None,
344 }
345 };
346}
347
348impl UserAgentParser {
349 pub fn parse_product<'a, S: AsRef<str> + ?Sized>(&'a self, user_agent: &'a S) -> Product<'a> {
350 let mut product = Product::default();
351
352 for product_regex in self.product_regexes.iter() {
353 if let Some(captures) = product_regex.regex.captures(user_agent.as_ref()) {
354 product.name = get_string!(
355 1,
356 product_regex.family_replacement,
357 self.replacement_regex,
358 captures
359 );
360 product.major =
361 get_string!(2, product_regex.v1_replacement, self.replacement_regex, captures);
362 product.minor =
363 get_string!(3, product_regex.v2_replacement, self.replacement_regex, captures);
364 product.patch =
365 get_string!(4, product_regex.v3_replacement, self.replacement_regex, captures);
366
367 break;
368 }
369 }
370
371 if product.name.is_none() {
372 product.name = Some(Cow::from("Other"));
373 }
374
375 product
376 }
377
378 pub fn parse_os<'a, S: AsRef<str> + ?Sized>(&'a self, user_agent: &'a S) -> OS<'a> {
379 let mut os = OS::default();
380
381 for os_regex in self.os_regexes.iter() {
382 if let Some(captures) = os_regex.regex.captures(user_agent.as_ref()) {
383 os.name = get_string!(1, os_regex.os_replacement, self.replacement_regex, captures);
384 os.major =
385 get_string!(2, os_regex.os_v1_replacement, self.replacement_regex, captures);
386 os.minor =
387 get_string!(3, os_regex.os_v2_replacement, self.replacement_regex, captures);
388 os.patch =
389 get_string!(4, os_regex.os_v3_replacement, self.replacement_regex, captures);
390 os.patch_minor =
391 get_string!(5, os_regex.os_v4_replacement, self.replacement_regex, captures);
392
393 break;
394 }
395 }
396
397 if os.name.is_none() {
398 os.name = Some(Cow::from("Other"));
399 }
400
401 os
402 }
403
404 pub fn parse_device<'a, S: AsRef<str> + ?Sized>(&'a self, user_agent: &'a S) -> Device<'a> {
405 let mut device = Device::default();
406
407 for device_regex in self.device_regexes.iter() {
408 if let Some(captures) = device_regex.regex.captures(user_agent.as_ref()) {
409 device.name = get_string!(
410 1,
411 device_regex.device_replacement,
412 self.replacement_regex,
413 captures
414 );
415 device.brand = get_string!(
416 2,
417 device_regex.brand_replacement,
418 self.replacement_regex,
419 captures
420 );
421 device.model = get_string!(
422 1,
423 device_regex.model_replacement,
424 self.replacement_regex,
425 captures
426 );
427
428 break;
429 }
430 }
431
432 if device.name.is_none() {
433 device.name = Some(Cow::from("Other"));
434 }
435
436 device
437 }
438
439 pub fn parse_cpu<'a, S: AsRef<str> + ?Sized>(&'a self, user_agent: &'a S) -> CPU<'a> {
440 let mut cpu = CPU::default();
441
442 for cpu_regex in self.cpu_regexes.iter() {
443 if let Some(captures) = cpu_regex.regex.captures(user_agent.as_ref()) {
444 cpu.architecture = get_string!(
445 1,
446 cpu_regex.architecture_replacement,
447 self.replacement_regex,
448 captures
449 );
450
451 break;
452 }
453 }
454
455 cpu
456 }
457
458 pub fn parse_engine<'a, S: AsRef<str> + ?Sized>(&'a self, user_agent: &'a S) -> Engine<'a> {
459 let mut engine = Engine::default();
460
461 for engine_regex in self.engine_regexes.iter() {
462 if let Some(captures) = engine_regex.regex.captures(user_agent.as_ref()) {
463 engine.name =
464 get_string!(1, engine_regex.name_replacement, self.replacement_regex, captures);
465 engine.major = get_string!(
466 2,
467 engine_regex.engine_v1_replacement,
468 self.replacement_regex,
469 captures
470 );
471 engine.minor = get_string!(
472 3,
473 engine_regex.engine_v2_replacement,
474 self.replacement_regex,
475 captures
476 );
477 engine.patch = get_string!(
478 4,
479 engine_regex.engine_v3_replacement,
480 self.replacement_regex,
481 captures
482 );
483
484 break;
485 }
486 }
487
488 engine
489 }
490}
491
492impl FromStr for UserAgentParser {
493 type Err = UserAgentParserError;
494
495 #[inline]
496 fn from_str(s: &str) -> Result<Self, Self::Err> {
497 UserAgentParser::from_str(s)
498 }
499}