1#![deny(unsafe_code)]
2#![warn(missing_docs)]
3#![allow(clippy::empty_docs)]
4#![doc = include_str!("../README.md")]
5
6use regex::Captures;
7use serde::Deserialize;
8
9pub use regex_filtered::{BuildError, ParseError};
10
11mod resolvers;
12
13#[derive(Debug)]
16pub enum Error {
17 ParseError(ParseError),
20 BuildError(BuildError),
23 MissingGroup(usize),
25}
26impl std::error::Error for Error {
27 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
28 match self {
29 Error::ParseError(p) => Some(p),
30 Error::BuildError(b) => Some(b),
31 Error::MissingGroup(_) => None,
32 }
33 }
34}
35impl std::fmt::Display for Error {
36 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37 write!(f, "{self:?}")
38 }
39}
40impl From<ParseError> for Error {
41 fn from(value: ParseError) -> Self {
42 Self::ParseError(value)
43 }
44}
45impl From<BuildError> for Error {
46 fn from(value: BuildError) -> Self {
47 Self::BuildError(value)
48 }
49}
50
51#[allow(missing_docs)]
58#[derive(Deserialize)]
59pub struct Regexes<'a> {
60 pub user_agent_parsers: Vec<user_agent::Parser<'a>>,
61 pub os_parsers: Vec<os::Parser<'a>>,
62 pub device_parsers: Vec<device::Parser<'a>>,
63}
64
65impl<'a> TryFrom<Regexes<'a>> for Extractor<'a> {
66 type Error = Error;
67 fn try_from(r: Regexes<'a>) -> Result<Self, Error> {
73 let ua = r
74 .user_agent_parsers
75 .into_iter()
76 .try_fold(user_agent::Builder::new(), |b, p| b.push(p))?
77 .build()?;
78 let os = r
79 .os_parsers
80 .into_iter()
81 .try_fold(os::Builder::new(), |b, p| b.push(p))?
82 .build()?;
83 let dev = r
84 .device_parsers
85 .into_iter()
86 .try_fold(device::Builder::new(), |b, p| b.push(p))?
87 .build()?;
88 Ok(Extractor { ua, os, dev })
89 }
90}
91
92#[allow(missing_docs)]
95pub struct Extractor<'a> {
96 pub ua: user_agent::Extractor<'a>,
97 pub os: os::Extractor<'a>,
98 pub dev: device::Extractor<'a>,
99}
100impl<'a> Extractor<'a> {
101 pub fn extract(
103 &'a self,
104 ua: &'a str,
105 ) -> (
106 Option<user_agent::ValueRef<'a>>,
107 Option<os::ValueRef<'a>>,
108 Option<device::ValueRef<'a>>,
109 ) {
110 (
111 self.ua.extract(ua),
112 self.os.extract(ua),
113 self.dev.extract(ua),
114 )
115 }
116}
117
118pub mod user_agent {
124 use serde::Deserialize;
125 use std::borrow::Cow;
126
127 use crate::resolvers::{FallbackResolver, FamilyResolver};
128 use regex_filtered::BuildError;
129
130 #[derive(Deserialize, Default)]
134 pub struct Parser<'a> {
135 pub regex: Cow<'a, str>,
138 pub family_replacement: Option<Cow<'a, str>>,
144 pub v1_replacement: Option<Cow<'a, str>>,
147 pub v2_replacement: Option<Cow<'a, str>>,
150 pub v3_replacement: Option<Cow<'a, str>>,
153 pub v4_replacement: Option<Cow<'a, str>>,
156 }
157
158 type Repl<'a> = (
159 FamilyResolver<'a>,
160 FallbackResolver<'a>,
163 FallbackResolver<'a>,
164 FallbackResolver<'a>,
165 FallbackResolver<'a>,
166 );
167
168 #[derive(Default)]
171 pub struct Builder<'a> {
172 builder: regex_filtered::Builder,
173 repl: Vec<Repl<'a>>,
174 }
175 impl<'a> Builder<'a> {
176 pub fn new() -> Self {
178 Self {
179 builder: regex_filtered::Builder::new_atom_len(3),
180 repl: Vec::new(),
181 }
182 }
183
184 pub fn build(self) -> Result<Extractor<'a>, BuildError> {
187 let Self { builder, repl } = self;
188
189 Ok(Extractor {
190 matcher: builder.build()?,
191 repl,
192 })
193 }
194
195 pub fn push(mut self, ua: Parser<'a>) -> Result<Self, super::Error> {
198 self.builder = self.builder.push(&super::rewrite_regex(&ua.regex))?;
199 let r = &self.builder.regexes()[self.builder.regexes().len() - 1];
200 let groups = r.captures_len() - 1;
202 self.repl.push((
203 FamilyResolver::new(ua.family_replacement, groups)?,
204 FallbackResolver::new(ua.v1_replacement, groups, 2),
205 FallbackResolver::new(ua.v2_replacement, groups, 3),
206 FallbackResolver::new(ua.v3_replacement, groups, 4),
207 FallbackResolver::new(ua.v4_replacement, groups, 5),
208 ));
209 Ok(self)
210 }
211
212 pub fn push_all<I>(self, ua: I) -> Result<Self, super::Error>
214 where
215 I: IntoIterator<Item = Parser<'a>>,
216 {
217 ua.into_iter().try_fold(self, |s, p| s.push(p))
218 }
219 }
220
221 pub struct Extractor<'a> {
223 matcher: regex_filtered::Regexes,
224 repl: Vec<Repl<'a>>,
225 }
226 impl<'a> Extractor<'a> {
227 pub fn extract(&'a self, ua: &'a str) -> Option<ValueRef<'a>> {
239 let (idx, re) = self.matcher.matching(ua).next()?;
240 let c = re.captures(ua)?;
241
242 let (f, v1, v2, v3, v4) = &self.repl[idx];
243
244 Some(ValueRef {
245 family: f.resolve(&c),
246 major: v1.resolve(&c),
247 minor: v2.resolve(&c),
248 patch: v3.resolve(&c),
249 patch_minor: v4.resolve(&c),
250 })
251 }
252 }
253 #[derive(PartialEq, Eq, Default, Debug)]
257 pub struct ValueRef<'a> {
258 pub family: Cow<'a, str>,
260 pub major: Option<&'a str>,
262 pub minor: Option<&'a str>,
264 pub patch: Option<&'a str>,
266 pub patch_minor: Option<&'a str>,
268 }
269
270 impl ValueRef<'_> {
271 pub fn into_owned(self) -> Value {
275 Value {
276 family: self.family.into_owned(),
277 major: self.major.map(|c| c.to_string()),
278 minor: self.minor.map(|c| c.to_string()),
279 patch: self.patch.map(|c| c.to_string()),
280 patch_minor: self.patch_minor.map(|c| c.to_string()),
281 }
282 }
283 }
284
285 #[derive(PartialEq, Eq, Default, Debug)]
288 pub struct Value {
289 pub family: String,
291 pub major: Option<String>,
293 pub minor: Option<String>,
295 pub patch: Option<String>,
297 pub patch_minor: Option<String>,
299 }
300}
301
302pub mod os {
304 use serde::Deserialize;
305 use std::borrow::Cow;
306
307 use regex_filtered::{BuildError, ParseError};
308
309 use crate::resolvers::{OptResolver, Resolver};
310
311 #[derive(Deserialize, Default)]
313 pub struct Parser<'a> {
314 pub regex: Cow<'a, str>,
316 pub os_replacement: Option<Cow<'a, str>>,
321 pub os_v1_replacement: Option<Cow<'a, str>>,
323 pub os_v2_replacement: Option<Cow<'a, str>>,
325 pub os_v3_replacement: Option<Cow<'a, str>>,
327 pub os_v4_replacement: Option<Cow<'a, str>>,
329 }
330 #[derive(Default)]
332 pub struct Builder<'a> {
333 builder: regex_filtered::Builder,
334 repl: Vec<(
335 Resolver<'a>,
336 OptResolver<'a>,
337 OptResolver<'a>,
338 OptResolver<'a>,
339 OptResolver<'a>,
340 )>,
341 }
342 impl<'a> Builder<'a> {
343 pub fn new() -> Self {
345 Self {
346 builder: regex_filtered::Builder::new_atom_len(3),
347 repl: Vec::new(),
348 }
349 }
350
351 pub fn build(self) -> Result<Extractor<'a>, BuildError> {
354 let Self { builder, repl } = self;
355
356 Ok(Extractor {
357 matcher: builder.build()?,
358 repl,
359 })
360 }
361
362 pub fn push(mut self, os: Parser<'a>) -> Result<Self, ParseError> {
366 self.builder = self.builder.push(&super::rewrite_regex(&os.regex))?;
367 let r = &self.builder.regexes()[self.builder.regexes().len() - 1];
368 let groups = r.captures_len() - 1;
370 self.repl.push((
371 Resolver::new(os.os_replacement, groups, 1),
372 OptResolver::new(os.os_v1_replacement, groups, 2),
373 OptResolver::new(os.os_v2_replacement, groups, 3),
374 OptResolver::new(os.os_v3_replacement, groups, 4),
375 OptResolver::new(os.os_v4_replacement, groups, 5),
376 ));
377 Ok(self)
378 }
379
380 pub fn push_all<I>(self, ua: I) -> Result<Self, ParseError>
382 where
383 I: IntoIterator<Item = Parser<'a>>,
384 {
385 ua.into_iter().try_fold(self, |s, p| s.push(p))
386 }
387 }
388
389 pub struct Extractor<'a> {
391 matcher: regex_filtered::Regexes,
392 repl: Vec<(
393 Resolver<'a>,
394 OptResolver<'a>,
395 OptResolver<'a>,
396 OptResolver<'a>,
397 OptResolver<'a>,
398 )>,
399 }
400 impl<'a> Extractor<'a> {
401 pub fn extract(&'a self, ua: &'a str) -> Option<ValueRef<'a>> {
404 let (idx, re) = self.matcher.matching(ua).next()?;
405 let c = re.captures(ua)?;
406
407 let (o, v1, v2, v3, v4) = &self.repl[idx];
408
409 Some(ValueRef {
410 os: o.resolve(&c),
411 major: v1.resolve(&c),
412 minor: v2.resolve(&c),
413 patch: v3.resolve(&c),
414 patch_minor: v4.resolve(&c),
415 })
416 }
417 }
418
419 #[derive(PartialEq, Eq, Default, Debug)]
421 pub struct ValueRef<'a> {
422 pub os: Cow<'a, str>,
424 pub major: Option<Cow<'a, str>>,
426 pub minor: Option<Cow<'a, str>>,
428 pub patch: Option<Cow<'a, str>>,
430 pub patch_minor: Option<Cow<'a, str>>,
432 }
433
434 impl ValueRef<'_> {
435 pub fn into_owned(self) -> Value {
439 Value {
440 os: self.os.into_owned(),
441 major: self.major.map(|c| c.into_owned()),
442 minor: self.minor.map(|c| c.into_owned()),
443 patch: self.patch.map(|c| c.into_owned()),
444 patch_minor: self.patch_minor.map(|c| c.into_owned()),
445 }
446 }
447 }
448
449 #[derive(PartialEq, Eq, Default, Debug)]
451 pub struct Value {
452 pub os: String,
454 pub major: Option<String>,
456 pub minor: Option<String>,
458 pub patch: Option<String>,
460 pub patch_minor: Option<String>,
462 }
463}
464
465pub mod device {
467 use serde::Deserialize;
468 use std::borrow::Cow;
469
470 use regex_filtered::{BuildError, ParseError};
471
472 use crate::resolvers::{OptResolver, Resolver};
473
474 #[derive(Deserialize, PartialEq, Eq)]
476 pub enum Flag {
477 #[serde(rename = "i")]
480 IgnoreCase,
481 }
482 #[derive(Deserialize, Default)]
484 pub struct Parser<'a> {
485 pub regex: Cow<'a, str>,
487 pub regex_flag: Option<Flag>,
489 pub device_replacement: Option<Cow<'a, str>>,
493 pub brand_replacement: Option<Cow<'a, str>>,
496 pub model_replacement: Option<Cow<'a, str>>,
500 }
501
502 #[derive(Default)]
504 pub struct Builder<'a> {
505 builder: regex_filtered::Builder,
506 repl: Vec<(Resolver<'a>, OptResolver<'a>, OptResolver<'a>)>,
507 }
508 impl<'a> Builder<'a> {
509 pub fn new() -> Self {
512 Self {
513 builder: regex_filtered::Builder::new_atom_len(2),
514 repl: Vec::new(),
515 }
516 }
517
518 pub fn build(self) -> Result<Extractor<'a>, BuildError> {
520 let Self { builder, repl } = self;
521
522 Ok(Extractor {
523 matcher: builder.build()?,
524 repl,
525 })
526 }
527
528 pub fn push(mut self, device: Parser<'a>) -> Result<Self, ParseError> {
534 self.builder = self.builder.push_opt(
535 &super::rewrite_regex(&device.regex),
536 regex_filtered::Options::new()
537 .case_insensitive(device.regex_flag == Some(Flag::IgnoreCase)),
538 )?;
539 let r = &self.builder.regexes()[self.builder.regexes().len() - 1];
540 let groups = r.captures_len() - 1;
542 self.repl.push((
543 Resolver::new(device.device_replacement, groups, 1),
544 OptResolver::new(device.brand_replacement, 0, 999),
545 OptResolver::new(device.model_replacement, groups, 1),
546 ));
547 Ok(self)
548 }
549
550 pub fn push_all<I>(self, ua: I) -> Result<Self, ParseError>
552 where
553 I: IntoIterator<Item = Parser<'a>>,
554 {
555 ua.into_iter().try_fold(self, |s, p| s.push(p))
556 }
557 }
558
559 pub struct Extractor<'a> {
561 matcher: regex_filtered::Regexes,
562 repl: Vec<(Resolver<'a>, OptResolver<'a>, OptResolver<'a>)>,
563 }
564 impl<'a> Extractor<'a> {
565 pub fn extract(&'a self, ua: &'a str) -> Option<ValueRef<'a>> {
569 let (idx, re) = self.matcher.matching(ua).next()?;
570 let c = re.captures(ua)?;
571
572 let (d, v1, v2) = &self.repl[idx];
573
574 Some(ValueRef {
575 device: d.resolve(&c),
576 brand: v1.resolve(&c),
577 model: v2.resolve(&c),
578 })
579 }
580 }
581
582 #[derive(PartialEq, Eq, Default, Debug)]
585 pub struct ValueRef<'a> {
586 pub device: Cow<'a, str>,
588 pub brand: Option<Cow<'a, str>>,
590 pub model: Option<Cow<'a, str>>,
592 }
593
594 impl ValueRef<'_> {
595 pub fn into_owned(self) -> Value {
600 Value {
601 device: self.device.into_owned(),
602 brand: self.brand.map(|c| c.into_owned()),
603 model: self.model.map(|c| c.into_owned()),
604 }
605 }
606 }
607
608 #[derive(PartialEq, Eq, Default, Debug)]
610 pub struct Value {
611 pub device: String,
613 pub brand: Option<String>,
615 pub model: Option<String>,
617 }
618}
619
620fn rewrite_regex(re: &str) -> std::borrow::Cow<'_, str> {
625 let mut from = 0;
626 let mut out = String::new();
627
628 let mut it = re.char_indices();
629 let mut escape = false;
630 let mut inclass = 0;
631 'main: while let Some((idx, c)) = it.next() {
632 match c {
633 '\\' if !escape => {
634 escape = true;
635 continue;
636 }
637 '{' if !escape && inclass == 0 => {
638 if idx == 0 {
639 return re.into();
641 }
642 let Some((_, start)) = it.next() else {
644 continue;
645 };
646 if start != '0' && start != '1' {
647 continue;
648 }
649
650 if !matches!(it.next(), Some((_, ','))) {
651 continue;
652 }
653
654 let mut digits = 0;
655 for (ri, rc) in it.by_ref() {
656 match rc {
657 '}' if digits > 2 => {
658 out.push_str(&re[from..idx]);
661 from = ri + 1;
662 out.push_str(if start == '0' { "*" } else { "+" });
663 break;
664 }
665 c if c.is_ascii_digit() => {
666 digits += 1;
667 }
668 _ => continue 'main,
669 }
670 }
671 }
672 '[' if !escape => {
673 inclass += 1;
674 }
675 ']' if !escape => {
676 inclass -= 1;
677 }
678 'd' if escape => {
681 out.push_str(&re[from..idx - 1]);
683 from = idx + 1;
684 out.push_str("[0-9]");
685 }
686 'D' if escape => {
687 out.push_str(&re[from..idx - 1]);
688 from = idx + 1;
689 out.push_str("[^0-9]");
690 }
691 'w' if escape => {
692 out.push_str(&re[from..idx - 1]);
693 from = idx + 1;
694 out.push_str("[A-Za-z0-9_]");
695 }
696 'W' if escape => {
697 out.push_str(&re[from..idx - 1]);
698 from = idx + 1;
699 out.push_str("[^A-Za-z0-9_]");
700 }
701 _ => (),
702 }
703 escape = false;
704 }
705
706 if from == 0 {
707 re.into()
708 } else {
709 out.push_str(&re[from..]);
710 out.into()
711 }
712}
713
714#[cfg(test)]
715mod test_rewrite_regex {
716 use super::rewrite_regex as rewrite;
717
718 #[test]
719 fn ignore_small_repetition() {
720 assert_eq!(rewrite(".{0,2}x"), ".{0,2}x");
721 assert_eq!(rewrite(".{0,}"), ".{0,}");
722 assert_eq!(rewrite(".{1,}"), ".{1,}");
723 }
724
725 #[test]
726 fn rewrite_large_repetitions() {
727 assert_eq!(rewrite(".{0,20}x"), ".{0,20}x");
728 assert_eq!(rewrite("(.{0,100})"), "(.*)");
729 assert_eq!(rewrite("(.{1,50})"), "(.{1,50})");
730 assert_eq!(rewrite(".{1,300}x"), ".+x");
731 }
732
733 #[test]
734 fn rewrite_all_repetitions() {
735 assert_eq!(
736 rewrite("; {0,2}(T-(?:07|[^0][0-9])[^;/]{1,100}?)(?: Build|\\) AppleWebKit)"),
737 "; {0,2}(T-(?:07|[^0][0-9])[^;/]+?)(?: Build|\\) AppleWebKit)",
738 );
739 assert_eq!(
740 rewrite("; {0,2}(SH\\-?[0-9][0-9][^;/]{1,100}|SBM[0-9][^;/]{1,100}?)(?: Build|\\) AppleWebKit)"),
741 "; {0,2}(SH\\-?[0-9][0-9][^;/]+|SBM[0-9][^;/]+?)(?: Build|\\) AppleWebKit)",
742 )
743 }
744
745 #[test]
746 fn ignore_non_repetitions() {
747 assert_eq!(
748 rewrite(r"\{1,2}"),
749 r"\{1,2}",
750 "if the opening brace is escaped it's not a repetition"
751 );
752 assert_eq!(
753 rewrite("[.{1,100}]"),
754 "[.{1,100}]",
755 "inside a set it's not a repetition"
756 );
757 }
758
759 #[test]
760 fn rewrite_classes() {
761 assert_eq!(rewrite(r"\dx"), "[0-9]x");
762 assert_eq!(rewrite(r"\wx"), "[A-Za-z0-9_]x");
763 assert_eq!(rewrite(r"[\d]x"), r"[[0-9]]x");
764 }
765}