user_agent_parser/
lib.rs

1/*!
2# User Agent Parser
3
4A parser to get the product, OS, device, cpu, and engine information from a user agent, inspired by https://github.com/faisalman/ua-parser-js and https://github.com/ua-parser/uap-core
5
6## Usage
7
8You can make a **regexes.yaml** file or copy one from https://github.com/ua-parser/uap-core
9
10This is a simple example of **regexes.yaml**.
11
12```yaml
13user_agent_parsers:
14  - regex: '(ESPN)[%20| ]+Radio/(\d+)\.(\d+)\.(\d+) CFNetwork'
15  - regex: '(Namoroka|Shiretoko|Minefield)/(\d+)\.(\d+)\.(\d+(?:pre|))'
16    family_replacement: 'Firefox ($1)'
17  - regex: '(Android) Eclair'
18    v1_replacement: '2'
19    v2_replacement: '1'
20
21os_parsers:
22  - regex: 'Win(?:dows)? ?(95|98|3.1|NT|ME|2000|XP|Vista|7|CE)'
23    os_replacement: 'Windows'
24    os_v1_replacement: '$1'
25
26device_parsers:
27  - regex: '\bSmartWatch *\( *([^;]+) *; *([^;]+) *;'
28    device_replacement: '$1 $2'
29    brand_replacement: '$1'
30    model_replacement: '$2'
31```
32
33Then, use the `from_path` (or `from_str` if your YAML data is in-memory) associated function to create a `UserAgentParser` instance.
34
35
36```rust,ignore
37use user_agent_parser::UserAgentParser;
38
39let ua_parser = UserAgentParser::from_path("/path/to/regexes.yaml").unwrap();
40```
41
42Use the `parse_*` methods and input a user-agent string to get information.
43
44```rust,ignore
45use user_agent_parser::UserAgentParser;
46
47let ua_parser = UserAgentParser::from_path("/path/to/regexes.yaml").unwrap();
48
49let user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 [FBAN/FBIOS;FBAV/8.0.0.28.18;FBBV/1665515;FBDV/iPhone4,1;FBMD/iPhone;FBSN/iPhone OS;FBSV/7.0.4;FBSS/2; FBCR/Telekom.de;FBID/phone;FBLC/de_DE;FBOP/5]";
50
51let product = ua_parser.parse_product(user_agent);
52
53println!("{:#?}", product);
54
55//    Product {
56//        name: Some(
57//            "Facebook",
58//        ),
59//        major: Some(
60//            "8",
61//        ),
62//        minor: Some(
63//            "0",
64//        ),
65//        patch: Some(
66//            "0",
67//        ),
68//    }
69
70let os = ua_parser.parse_os(user_agent);
71
72println!("{:#?}", os);
73
74//    OS {
75//        name: Some(
76//            "iOS",
77//        ),
78//        major: None,
79//        minor: None,
80//        patch: None,
81//        patch_minor: None,
82//    }
83
84let device = ua_parser.parse_device(user_agent);
85
86println!("{:#?}", device);
87
88//    Device {
89//        name: Some(
90//            "iPhone",
91//        ),
92//        brand: Some(
93//            "Apple",
94//        ),
95//        model: Some(
96//            "iPhone4,1",
97//        ),
98//    }
99
100let cpu = ua_parser.parse_cpu(user_agent);
101
102println!("{:#?}", cpu);
103
104//    CPU {
105//        architecture: Some(
106//            "amd64",
107//        ),
108//    }
109
110let engine = ua_parser.parse_engine(user_agent);
111
112println!("{:#?}", engine);
113
114//    Engine {
115//        name: Some(
116//            "Gecko",
117//        ),
118//        major: Some(
119//            "10",
120//        ),
121//        minor: Some(
122//            "0",
123//        ),
124//        patch: None,
125//    }
126```
127
128The lifetime of result instances of the `parse_*` methods depends on the user-agent string and the `UserAgentParser` instance. To make it independent, call the `into_owned` method.
129
130```rust,ignore
131use user_agent_parser::UserAgentParser;
132
133let ua_parser = UserAgentParser::from_path("/path/to/regexes.yaml").unwrap();
134
135let product = ua_parser.parse_product("Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/10.04 (lucid) Firefox/3.6.12").into_owned();
136```
137
138## Rocket Support
139
140This crate supports the Rocket framework. All you have to do is enabling the `rocket` feature for this crate.
141
142```toml
143[dependencies.user-agent-parser]
144version = "*"
145features = ["rocket"]
146```
147
148Let `Rocket` manage a `UserAgentParser` instance, and the `Product`, `OS`, `Device`, `CPU`, `Engine` models of this crate (plus the `UserAgent` model) can be used as *Request Guards*.
149
150```rust,ignore
151#[macro_use]
152extern crate rocket;
153
154use user_agent_parser::{UserAgentParser, UserAgent, Product, OS, Device, CPU, Engine};
155
156#[get("/")]
157fn index(user_agent: UserAgent, product: Product, os: OS, device: Device, cpu: CPU, engine: Engine) -> String {
158    format!("{user_agent:#?}\n{product:#?}\n{os:#?}\n{device:#?}\n{cpu:#?}\n{engine:#?}",
159            user_agent = user_agent,
160            product = product,
161            os = os,
162            device = device,
163            cpu = cpu,
164            engine = engine,
165    )
166}
167
168#[launch]
169fn rocket() -> _ {
170    rocket::build()
171        .manage(UserAgentParser::from_path("/path/to/regexes.yaml").unwrap())
172        .mount("/", routes![index])
173}
174```
175
176## Testing
177
178```bash
179# git clone --recurse-submodules git://github.com/magiclen/user-agent-parser.git
180
181git clone git://github.com/magiclen/user-agent-parser.git
182
183cd user-agent-parser
184
185git submodule init
186git submodule update --recursive
187
188cargo test
189```
190*/
191
192mod errors;
193mod models;
194mod regexes;
195
196#[cfg(feature = "rocket")]
197mod request_guards;
198
199use std::{borrow::Cow, fs, path::Path, str::FromStr};
200
201pub use errors::UserAgentParserError;
202pub use models::*;
203use onig::Regex;
204use regexes::*;
205use yaml_rust::{Yaml, YamlLoader};
206
207#[derive(Debug)]
208pub struct UserAgentParser {
209    replacement_regex: Regex,
210    product_regexes:   Vec<ProductRegex>,
211    os_regexes:        Vec<OSRegex>,
212    device_regexes:    Vec<DeviceRegex>,
213    cpu_regexes:       Vec<CPURegex>,
214    engine_regexes:    Vec<EngineRegex>,
215}
216
217impl UserAgentParser {
218    /// Read the list of regular expressions (YAML data) from a file to create a `UserAgentParser` instance.
219    #[inline]
220    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<UserAgentParser, UserAgentParserError> {
221        let yaml = fs::read_to_string(path)?;
222
223        Self::from_str(yaml)
224    }
225
226    /// Read the list of regular expressions (YAML data) from a string to create a `UserAgentParser` instance.
227    #[allow(clippy::should_implement_trait)]
228    pub fn from_str<S: AsRef<str>>(yaml: S) -> Result<UserAgentParser, UserAgentParserError> {
229        let yamls = YamlLoader::load_from_str(yaml.as_ref())?;
230
231        if yamls.is_empty() {
232            Err(UserAgentParserError::IncorrectSource)
233        } else {
234            let yaml = &yamls[0];
235
236            match yaml.as_hash() {
237                Some(yaml) => {
238                    let user_agent_parsers =
239                        yaml.get(&Yaml::String("user_agent_parsers".to_string()));
240                    let os_parsers = yaml.get(&Yaml::String("os_parsers".to_string()));
241                    let device_parsers = yaml.get(&Yaml::String("device_parsers".to_string()));
242
243                    let user_agent_regexes = match user_agent_parsers {
244                        Some(user_agent_parsers) => ProductRegex::from_yaml(user_agent_parsers)?,
245                        None => Vec::new(),
246                    };
247
248                    let os_regexes = match os_parsers {
249                        Some(os_parsers) => OSRegex::from_yaml(os_parsers)?,
250                        None => Vec::new(),
251                    };
252
253                    let device_regexes = match device_parsers {
254                        Some(device_parsers) => DeviceRegex::from_yaml(device_parsers)?,
255                        None => Vec::new(),
256                    };
257
258                    Ok(UserAgentParser {
259                        replacement_regex: Regex::new(r"\$(\d){1,9}").unwrap(),
260                        product_regexes: user_agent_regexes,
261                        os_regexes,
262                        device_regexes,
263                        cpu_regexes: CPURegex::built_in_regexes(),
264                        engine_regexes: EngineRegex::built_in_regexes(),
265                    })
266                },
267                None => Err(UserAgentParserError::IncorrectSource),
268            }
269        }
270    }
271}
272
273macro_rules! get_string {
274    ($index:expr, $replacement:expr, $replacement_regex:expr, $captures:expr) => {
275        match $replacement.as_ref() {
276            Some(replacement) => {
277                let replacement_captures_vec: Vec<_> =
278                    $replacement_regex.captures_iter(replacement).collect();
279
280                if replacement_captures_vec.is_empty() {
281                    Some(Cow::from(replacement))
282                } else {
283                    let mut replacement = replacement.to_string();
284
285                    let captures_len = $captures.len();
286
287                    for replacement_captures in replacement_captures_vec.into_iter().rev() {
288                        let index = replacement_captures.at(1).unwrap().parse::<usize>().unwrap();
289
290                        let pos = replacement_captures.pos(0).unwrap();
291
292                        if index < captures_len {
293                            replacement.replace_range(
294                                pos.0..pos.1,
295                                $captures.at(index).unwrap_or_default(),
296                            );
297                        } else {
298                            replacement.replace_range(pos.0..pos.1, "");
299                        }
300                    }
301
302                    let start_trimmed_replacement = replacement.trim_start();
303
304                    if start_trimmed_replacement.len() != replacement.len() {
305                        replacement = start_trimmed_replacement.trim_end().to_string();
306                    } else {
307                        replacement.truncate(replacement.trim_end().len());
308                    }
309
310                    if replacement.is_empty() {
311                        None
312                    } else {
313                        Some(Cow::from(replacement))
314                    }
315                }
316            },
317            None => match $captures.at($index) {
318                Some(s) => {
319                    let s = s.trim();
320
321                    if s.is_empty() {
322                        None
323                    } else {
324                        Some(Cow::from(s))
325                    }
326                },
327                None => None,
328            },
329        }
330    };
331
332    ($index:expr, $captures:expr) => {
333        match $captures.at($index) {
334            Some(s) => {
335                let s = s.trim();
336
337                if s.is_empty() {
338                    None
339                } else {
340                    Some(Cow::from(s))
341                }
342            },
343            None => None,
344        }
345    };
346}
347
348impl UserAgentParser {
349    pub fn parse_product<'a, S: AsRef<str> + ?Sized>(&'a self, user_agent: &'a S) -> Product<'a> {
350        let mut product = Product::default();
351
352        for product_regex in self.product_regexes.iter() {
353            if let Some(captures) = product_regex.regex.captures(user_agent.as_ref()) {
354                product.name = get_string!(
355                    1,
356                    product_regex.family_replacement,
357                    self.replacement_regex,
358                    captures
359                );
360                product.major =
361                    get_string!(2, product_regex.v1_replacement, self.replacement_regex, captures);
362                product.minor =
363                    get_string!(3, product_regex.v2_replacement, self.replacement_regex, captures);
364                product.patch =
365                    get_string!(4, product_regex.v3_replacement, self.replacement_regex, captures);
366
367                break;
368            }
369        }
370
371        if product.name.is_none() {
372            product.name = Some(Cow::from("Other"));
373        }
374
375        product
376    }
377
378    pub fn parse_os<'a, S: AsRef<str> + ?Sized>(&'a self, user_agent: &'a S) -> OS<'a> {
379        let mut os = OS::default();
380
381        for os_regex in self.os_regexes.iter() {
382            if let Some(captures) = os_regex.regex.captures(user_agent.as_ref()) {
383                os.name = get_string!(1, os_regex.os_replacement, self.replacement_regex, captures);
384                os.major =
385                    get_string!(2, os_regex.os_v1_replacement, self.replacement_regex, captures);
386                os.minor =
387                    get_string!(3, os_regex.os_v2_replacement, self.replacement_regex, captures);
388                os.patch =
389                    get_string!(4, os_regex.os_v3_replacement, self.replacement_regex, captures);
390                os.patch_minor =
391                    get_string!(5, os_regex.os_v4_replacement, self.replacement_regex, captures);
392
393                break;
394            }
395        }
396
397        if os.name.is_none() {
398            os.name = Some(Cow::from("Other"));
399        }
400
401        os
402    }
403
404    pub fn parse_device<'a, S: AsRef<str> + ?Sized>(&'a self, user_agent: &'a S) -> Device<'a> {
405        let mut device = Device::default();
406
407        for device_regex in self.device_regexes.iter() {
408            if let Some(captures) = device_regex.regex.captures(user_agent.as_ref()) {
409                device.name = get_string!(
410                    1,
411                    device_regex.device_replacement,
412                    self.replacement_regex,
413                    captures
414                );
415                device.brand = get_string!(
416                    2,
417                    device_regex.brand_replacement,
418                    self.replacement_regex,
419                    captures
420                );
421                device.model = get_string!(
422                    1,
423                    device_regex.model_replacement,
424                    self.replacement_regex,
425                    captures
426                );
427
428                break;
429            }
430        }
431
432        if device.name.is_none() {
433            device.name = Some(Cow::from("Other"));
434        }
435
436        device
437    }
438
439    pub fn parse_cpu<'a, S: AsRef<str> + ?Sized>(&'a self, user_agent: &'a S) -> CPU<'a> {
440        let mut cpu = CPU::default();
441
442        for cpu_regex in self.cpu_regexes.iter() {
443            if let Some(captures) = cpu_regex.regex.captures(user_agent.as_ref()) {
444                cpu.architecture = get_string!(
445                    1,
446                    cpu_regex.architecture_replacement,
447                    self.replacement_regex,
448                    captures
449                );
450
451                break;
452            }
453        }
454
455        cpu
456    }
457
458    pub fn parse_engine<'a, S: AsRef<str> + ?Sized>(&'a self, user_agent: &'a S) -> Engine<'a> {
459        let mut engine = Engine::default();
460
461        for engine_regex in self.engine_regexes.iter() {
462            if let Some(captures) = engine_regex.regex.captures(user_agent.as_ref()) {
463                engine.name =
464                    get_string!(1, engine_regex.name_replacement, self.replacement_regex, captures);
465                engine.major = get_string!(
466                    2,
467                    engine_regex.engine_v1_replacement,
468                    self.replacement_regex,
469                    captures
470                );
471                engine.minor = get_string!(
472                    3,
473                    engine_regex.engine_v2_replacement,
474                    self.replacement_regex,
475                    captures
476                );
477                engine.patch = get_string!(
478                    4,
479                    engine_regex.engine_v3_replacement,
480                    self.replacement_regex,
481                    captures
482                );
483
484                break;
485            }
486        }
487
488        engine
489    }
490}
491
492impl FromStr for UserAgentParser {
493    type Err = UserAgentParserError;
494
495    #[inline]
496    fn from_str(s: &str) -> Result<Self, Self::Err> {
497        UserAgentParser::from_str(s)
498    }
499}