browscap_rs/
user_agent_parser.rs

1use crate::filter::{self, Filter, FilterType};
2use crate::literal::Literal;
3use crate::rule::Rule;
4use crate::{Capabilities, UserAgentParser, literal};
5use log::debug;
6use std::time::Instant;
7use ustr::Ustr;
8
9pub const COMMON: [&str; 75] = [
10    "-",
11    "0",
12    "1",
13    "2",
14    "3",
15    "4",
16    "5",
17    "6",
18    "7",
19    "8",
20    "9",
21    "profile",
22    "player",
23    "compatible",
24    "android",
25    "google",
26    "tab",
27    "transformer",
28    "lenovo",
29    "micro",
30    "edge",
31    "safari",
32    "opera",
33    "chrome",
34    "firefox",
35    "msie",
36    "chromium",
37    "cpu os ",
38    "cpu iphone os ",
39    "windows nt ",
40    "mac os x ",
41    "linux",
42    "bsd",
43    "windows phone",
44    "iphone",
45    "pad",
46    "blackberry",
47    "nokia",
48    "alcatel",
49    "ucbrowser",
50    "mobile",
51    "ie",
52    "mercury",
53    "samsung",
54    "browser",
55    "wow64",
56    "silk",
57    "lunascape",
58    "crios",
59    "epiphany",
60    "konqueror",
61    "version",
62    "rv:",
63    "build",
64    "bot",
65    "like gecko",
66    "applewebkit",
67    "trident",
68    "mozilla",
69    "windows nt 4",
70    "windows nt 5.0",
71    "windows nt 5.1",
72    "windows nt 5.2",
73    "windows nt 6.0",
74    "windows nt 6.1",
75    "windows nt 6.2",
76    "windows nt 6.3",
77    "windows nt 10.0",
78    "android?4.0",
79    "android?4.1",
80    "android?4.2",
81    "android?4.3",
82    "android?4.4",
83    "android?2.3",
84    "android?5",
85];
86
87const FILTER_PREFIXES: [&str; 2] = ["mozilla/5.0", "mozilla/4"];
88
89impl  UserAgentParser {
90    pub fn new(mut rules: Vec<Rule>) -> UserAgentParser {
91        let timer=Instant::now();
92        get_ordered_rules(&mut rules);
93        let time=timer.elapsed();
94        debug!("order rules time:{:?}",time);
95        let my_filters = build_filters(&rules);
96        UserAgentParser {
97            my_rules: rules,
98            my_filters: my_filters,
99        }
100    }
101
102    pub fn parse(&self, user_agent: &str) -> &Capabilities {
103        if user_agent.is_empty() {
104            return crate::capabilities::DEFAULT_CAPABILITIES.get().unwrap();
105        };
106        let mut search_string = literal::get_searchable_string(user_agent.to_lowercase());
107        let includes = filter::filter(&mut search_string, &self.my_filters, self.my_rules.len());
108        for i in includes.iter_ones() {
109            let rule = &self.my_rules[i];
110            if rule.matches(&mut search_string) {
111                return rule.get_capabilities();
112            }
113        }
114        return crate::capabilities::DEFAULT_CAPABILITIES.get().unwrap();
115    }
116}
117
118fn build_filters(my_rules: &Vec<Rule>) -> Vec<Filter> {
119    let timer=Instant::now();
120    let mut result = Vec::new();
121
122    for pattern in FILTER_PREFIXES {
123        let pattern = Ustr::from(pattern);
124        let literal = Literal::create_literal(pattern);
125        let mask = filter::create_prefix_masker(my_rules, pattern);
126        result.push(Filter::new(FilterType::Prefix(literal), mask));
127    }
128    // Build filters for specific contains constraints
129    for common in COMMON {
130        let common = Ustr::from(common);
131        let literal = Literal::create_literal(common);
132        let mask = filter::create_contains_masker(my_rules, common);
133        result.push(Filter::new(FilterType::Contains(literal), mask));
134    }
135    let time=timer.elapsed();
136    debug!("build filters time:{:?}",time);
137    result
138}
139fn get_ordered_rules(rules: &mut Vec<Rule>) {
140     let total_timer = Instant::now();
141    let mut pattern_build_count = 0;
142    
143    // 阶段1:快速按size排序
144    rules.sort_by_key(|r| std::cmp::Reverse(r.get_size()));
145    
146    // 阶段2:只处理size相同的组
147    let mut i = 0;
148    while i < rules.len() {
149        let current_size = rules[i].get_size();
150        let mut j = i + 1;
151        
152        while j < rules.len() && rules[j].get_size() == current_size {
153            j += 1;
154        }
155        
156        if j - i > 1 {
157            // 关键优化:组内预计算Pattern
158            let patterns: Vec<String> = rules[i..j]
159                .iter()
160                .map(|r| {
161                    pattern_build_count += 1;
162                    r.get_pattern()
163                })
164                .collect();
165            
166            // 使用预计算的patterns进行排序,但记录原始索引
167            let mut indexed_patterns: Vec<(usize, &String)> = 
168                patterns.iter().enumerate().collect();
169            
170            indexed_patterns.sort_by(|a, b| a.1.cmp(b.1));
171            
172            // 获取排序后的索引顺序
173            let mut sorted_indices: Vec<usize> = 
174                indexed_patterns.into_iter().map(|(idx, _)| idx).collect();
175            
176            // 原地重新排列rules(使用交换)
177            for pos in 0..(j - i) {
178                let current = pos;
179                while sorted_indices[current] != current {
180                    let target = sorted_indices[current];
181                    rules.swap(i + current, i + target);
182                    sorted_indices.swap(current, target);
183                }
184            }
185        }
186        
187        i = j;
188    }
189    
190    debug!("总时间: {:?}", total_timer.elapsed());
191    debug!("最终Pattern构建次数: {}", pattern_build_count);
192}