#![deny(unsafe_code)]
#![warn(missing_docs)]
#![allow(clippy::empty_docs)]
#![doc = include_str!("../README.md")]
use regex::Captures;
use serde::Deserialize;
pub use regex_filtered::{BuildError, ParseError};
mod resolvers;
#[derive(Debug)]
pub enum Error {
ParseError(ParseError),
BuildError(BuildError),
MissingGroup(usize),
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::ParseError(p) => Some(p),
Error::BuildError(b) => Some(b),
Error::MissingGroup(_) => None,
}
}
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{self:?}")
}
}
impl From<ParseError> for Error {
fn from(value: ParseError) -> Self {
Self::ParseError(value)
}
}
impl From<BuildError> for Error {
fn from(value: BuildError) -> Self {
Self::BuildError(value)
}
}
#[allow(missing_docs)]
#[derive(Deserialize)]
pub struct Regexes<'a> {
pub user_agent_parsers: Vec<user_agent::Parser<'a>>,
pub os_parsers: Vec<os::Parser<'a>>,
pub device_parsers: Vec<device::Parser<'a>>,
}
impl<'a> TryFrom<Regexes<'a>> for Extractor<'a> {
type Error = Error;
fn try_from(r: Regexes<'a>) -> Result<Self, Error> {
let ua = r
.user_agent_parsers
.into_iter()
.try_fold(user_agent::Builder::new(), |b, p| b.push(p))?
.build()?;
let os = r
.os_parsers
.into_iter()
.try_fold(os::Builder::new(), |b, p| b.push(p))?
.build()?;
let dev = r
.device_parsers
.into_iter()
.try_fold(device::Builder::new(), |b, p| b.push(p))?
.build()?;
Ok(Extractor { ua, os, dev })
}
}
#[allow(missing_docs)]
pub struct Extractor<'a> {
pub ua: user_agent::Extractor<'a>,
pub os: os::Extractor<'a>,
pub dev: device::Extractor<'a>,
}
impl<'a> Extractor<'a> {
pub fn extract(
&'a self,
ua: &'a str,
) -> (
Option<user_agent::ValueRef<'a>>,
Option<os::ValueRef<'a>>,
Option<device::ValueRef<'a>>,
) {
(
self.ua.extract(ua),
self.os.extract(ua),
self.dev.extract(ua),
)
}
}
pub mod user_agent {
use serde::Deserialize;
use std::borrow::Cow;
use crate::resolvers::{FallbackResolver, FamilyResolver};
use regex_filtered::BuildError;
#[derive(Deserialize, Default)]
pub struct Parser<'a> {
pub regex: Cow<'a, str>,
pub family_replacement: Option<Cow<'a, str>>,
pub v1_replacement: Option<Cow<'a, str>>,
pub v2_replacement: Option<Cow<'a, str>>,
pub v3_replacement: Option<Cow<'a, str>>,
pub v4_replacement: Option<Cow<'a, str>>,
}
type Repl<'a> = (
FamilyResolver<'a>,
FallbackResolver<'a>,
FallbackResolver<'a>,
FallbackResolver<'a>,
FallbackResolver<'a>,
);
#[derive(Default)]
pub struct Builder<'a> {
builder: regex_filtered::Builder,
repl: Vec<Repl<'a>>,
}
impl<'a> Builder<'a> {
pub fn new() -> Self {
Self {
builder: regex_filtered::Builder::new_atom_len(3),
repl: Vec::new(),
}
}
pub fn build(self) -> Result<Extractor<'a>, BuildError> {
let Self { builder, repl } = self;
Ok(Extractor {
matcher: builder.build()?,
repl,
})
}
pub fn push(mut self, ua: Parser<'a>) -> Result<Self, super::Error> {
self.builder = self.builder.push(&super::rewrite_regex(&ua.regex))?;
let r = &self.builder.regexes()[self.builder.regexes().len() - 1];
let groups = r.captures_len() - 1;
self.repl.push((
FamilyResolver::new(ua.family_replacement, groups)?,
FallbackResolver::new(ua.v1_replacement, groups, 2),
FallbackResolver::new(ua.v2_replacement, groups, 3),
FallbackResolver::new(ua.v3_replacement, groups, 4),
FallbackResolver::new(ua.v4_replacement, groups, 5),
));
Ok(self)
}
pub fn push_all<I>(self, ua: I) -> Result<Self, super::Error>
where
I: IntoIterator<Item = Parser<'a>>,
{
ua.into_iter().try_fold(self, |s, p| s.push(p))
}
}
pub struct Extractor<'a> {
matcher: regex_filtered::Regexes,
repl: Vec<Repl<'a>>,
}
impl<'a> Extractor<'a> {
pub fn extract(&'a self, ua: &'a str) -> Option<ValueRef<'a>> {
let (idx, re) = self.matcher.matching(ua).next()?;
let c = re.captures(ua)?;
let (f, v1, v2, v3, v4) = &self.repl[idx];
Some(ValueRef {
family: f.resolve(&c),
major: v1.resolve(&c),
minor: v2.resolve(&c),
patch: v3.resolve(&c),
patch_minor: v4.resolve(&c),
})
}
}
#[derive(PartialEq, Eq, Default, Debug, Clone)]
pub struct ValueRef<'a> {
pub family: Cow<'a, str>,
pub major: Option<&'a str>,
pub minor: Option<&'a str>,
pub patch: Option<&'a str>,
pub patch_minor: Option<&'a str>,
}
impl ValueRef<'_> {
pub fn into_owned(self) -> Value {
Value {
family: self.family.into_owned(),
major: self.major.map(|c| c.to_string()),
minor: self.minor.map(|c| c.to_string()),
patch: self.patch.map(|c| c.to_string()),
patch_minor: self.patch_minor.map(|c| c.to_string()),
}
}
}
#[derive(PartialEq, Eq, Default, Debug, Clone)]
pub struct Value {
pub family: String,
pub major: Option<String>,
pub minor: Option<String>,
pub patch: Option<String>,
pub patch_minor: Option<String>,
}
}
pub mod os {
use serde::Deserialize;
use std::borrow::Cow;
use regex_filtered::{BuildError, ParseError};
use crate::resolvers::{OptResolver, Resolver};
#[derive(Deserialize, Default)]
pub struct Parser<'a> {
pub regex: Cow<'a, str>,
pub os_replacement: Option<Cow<'a, str>>,
pub os_v1_replacement: Option<Cow<'a, str>>,
pub os_v2_replacement: Option<Cow<'a, str>>,
pub os_v3_replacement: Option<Cow<'a, str>>,
pub os_v4_replacement: Option<Cow<'a, str>>,
}
#[derive(Default)]
pub struct Builder<'a> {
builder: regex_filtered::Builder,
repl: Vec<(
Resolver<'a>,
OptResolver<'a>,
OptResolver<'a>,
OptResolver<'a>,
OptResolver<'a>,
)>,
}
impl<'a> Builder<'a> {
pub fn new() -> Self {
Self {
builder: regex_filtered::Builder::new_atom_len(3),
repl: Vec::new(),
}
}
pub fn build(self) -> Result<Extractor<'a>, BuildError> {
let Self { builder, repl } = self;
Ok(Extractor {
matcher: builder.build()?,
repl,
})
}
pub fn push(mut self, os: Parser<'a>) -> Result<Self, ParseError> {
self.builder = self.builder.push(&super::rewrite_regex(&os.regex))?;
let r = &self.builder.regexes()[self.builder.regexes().len() - 1];
let groups = r.captures_len() - 1;
self.repl.push((
Resolver::new(os.os_replacement, groups, 1),
OptResolver::new(os.os_v1_replacement, groups, 2),
OptResolver::new(os.os_v2_replacement, groups, 3),
OptResolver::new(os.os_v3_replacement, groups, 4),
OptResolver::new(os.os_v4_replacement, groups, 5),
));
Ok(self)
}
pub fn push_all<I>(self, ua: I) -> Result<Self, ParseError>
where
I: IntoIterator<Item = Parser<'a>>,
{
ua.into_iter().try_fold(self, |s, p| s.push(p))
}
}
pub struct Extractor<'a> {
matcher: regex_filtered::Regexes,
repl: Vec<(
Resolver<'a>,
OptResolver<'a>,
OptResolver<'a>,
OptResolver<'a>,
OptResolver<'a>,
)>,
}
impl<'a> Extractor<'a> {
pub fn extract(&'a self, ua: &'a str) -> Option<ValueRef<'a>> {
let (idx, re) = self.matcher.matching(ua).next()?;
let c = re.captures(ua)?;
let (o, v1, v2, v3, v4) = &self.repl[idx];
Some(ValueRef {
os: o.resolve(&c),
major: v1.resolve(&c),
minor: v2.resolve(&c),
patch: v3.resolve(&c),
patch_minor: v4.resolve(&c),
})
}
}
#[derive(PartialEq, Eq, Default, Debug, Clone)]
pub struct ValueRef<'a> {
pub os: Cow<'a, str>,
pub major: Option<Cow<'a, str>>,
pub minor: Option<Cow<'a, str>>,
pub patch: Option<Cow<'a, str>>,
pub patch_minor: Option<Cow<'a, str>>,
}
impl ValueRef<'_> {
pub fn into_owned(self) -> Value {
Value {
os: self.os.into_owned(),
major: self.major.map(|c| c.into_owned()),
minor: self.minor.map(|c| c.into_owned()),
patch: self.patch.map(|c| c.into_owned()),
patch_minor: self.patch_minor.map(|c| c.into_owned()),
}
}
}
#[derive(PartialEq, Eq, Default, Debug, Clone)]
pub struct Value {
pub os: String,
pub major: Option<String>,
pub minor: Option<String>,
pub patch: Option<String>,
pub patch_minor: Option<String>,
}
}
pub mod device {
use serde::Deserialize;
use std::borrow::Cow;
use regex_filtered::{BuildError, ParseError};
use crate::resolvers::{OptResolver, Resolver};
#[derive(Deserialize, PartialEq, Eq)]
pub enum Flag {
#[serde(rename = "i")]
IgnoreCase,
}
#[derive(Deserialize, Default)]
pub struct Parser<'a> {
pub regex: Cow<'a, str>,
pub regex_flag: Option<Flag>,
pub device_replacement: Option<Cow<'a, str>>,
pub brand_replacement: Option<Cow<'a, str>>,
pub model_replacement: Option<Cow<'a, str>>,
}
#[derive(Default)]
pub struct Builder<'a> {
builder: regex_filtered::Builder,
repl: Vec<(Resolver<'a>, OptResolver<'a>, OptResolver<'a>)>,
}
impl<'a> Builder<'a> {
pub fn new() -> Self {
Self {
builder: regex_filtered::Builder::new_atom_len(2),
repl: Vec::new(),
}
}
pub fn build(self) -> Result<Extractor<'a>, BuildError> {
let Self { builder, repl } = self;
Ok(Extractor {
matcher: builder.build()?,
repl,
})
}
pub fn push(mut self, device: Parser<'a>) -> Result<Self, ParseError> {
self.builder = self.builder.push_opt(
&super::rewrite_regex(&device.regex),
regex_filtered::Options::new()
.case_insensitive(device.regex_flag == Some(Flag::IgnoreCase)),
)?;
let r = &self.builder.regexes()[self.builder.regexes().len() - 1];
let groups = r.captures_len() - 1;
self.repl.push((
Resolver::new(device.device_replacement, groups, 1),
OptResolver::new(device.brand_replacement, 0, 999),
OptResolver::new(device.model_replacement, groups, 1),
));
Ok(self)
}
pub fn push_all<I>(self, ua: I) -> Result<Self, ParseError>
where
I: IntoIterator<Item = Parser<'a>>,
{
ua.into_iter().try_fold(self, |s, p| s.push(p))
}
}
pub struct Extractor<'a> {
matcher: regex_filtered::Regexes,
repl: Vec<(Resolver<'a>, OptResolver<'a>, OptResolver<'a>)>,
}
impl<'a> Extractor<'a> {
pub fn extract(&'a self, ua: &'a str) -> Option<ValueRef<'a>> {
let (idx, re) = self.matcher.matching(ua).next()?;
let c = re.captures(ua)?;
let (d, v1, v2) = &self.repl[idx];
Some(ValueRef {
device: d.resolve(&c),
brand: v1.resolve(&c),
model: v2.resolve(&c),
})
}
}
#[derive(PartialEq, Eq, Default, Debug, Clone)]
pub struct ValueRef<'a> {
pub device: Cow<'a, str>,
pub brand: Option<Cow<'a, str>>,
pub model: Option<Cow<'a, str>>,
}
impl ValueRef<'_> {
pub fn into_owned(self) -> Value {
Value {
device: self.device.into_owned(),
brand: self.brand.map(|c| c.into_owned()),
model: self.model.map(|c| c.into_owned()),
}
}
}
#[derive(PartialEq, Eq, Default, Debug, Clone)]
pub struct Value {
pub device: String,
pub brand: Option<String>,
pub model: Option<String>,
}
}
fn rewrite_regex(re: &str) -> std::borrow::Cow<'_, str> {
let mut from = 0;
let mut out = String::new();
let mut it = re.char_indices();
let mut escape = false;
let mut inclass = 0;
'main: while let Some((idx, c)) = it.next() {
match c {
'\\' if !escape => {
escape = true;
continue;
}
'{' if !escape && inclass == 0 => {
if idx == 0 {
return re.into();
}
let Some((_, start)) = it.next() else {
continue;
};
if start != '0' && start != '1' {
continue;
}
if !matches!(it.next(), Some((_, ','))) {
continue;
}
let mut digits = 0;
for (ri, rc) in it.by_ref() {
match rc {
'}' if digits > 2 => {
out.push_str(&re[from..idx]);
from = ri + 1;
out.push_str(if start == '0' { "*" } else { "+" });
break;
}
c if c.is_ascii_digit() => {
digits += 1;
}
_ => continue 'main,
}
}
}
'[' if !escape => {
inclass += 1;
}
']' if !escape => {
inclass -= 1;
}
'd' if escape => {
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[0-9]");
}
'D' if escape => {
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[^0-9]");
}
'w' if escape => {
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[A-Za-z0-9_]");
}
'W' if escape => {
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[^A-Za-z0-9_]");
}
_ => (),
}
escape = false;
}
if from == 0 {
re.into()
} else {
out.push_str(&re[from..]);
out.into()
}
}
#[cfg(test)]
mod test_rewrite_regex {
use super::rewrite_regex as rewrite;
#[test]
fn ignore_small_repetition() {
assert_eq!(rewrite(".{0,2}x"), ".{0,2}x");
assert_eq!(rewrite(".{0,}"), ".{0,}");
assert_eq!(rewrite(".{1,}"), ".{1,}");
}
#[test]
fn rewrite_large_repetitions() {
assert_eq!(rewrite(".{0,20}x"), ".{0,20}x");
assert_eq!(rewrite("(.{0,100})"), "(.*)");
assert_eq!(rewrite("(.{1,50})"), "(.{1,50})");
assert_eq!(rewrite(".{1,300}x"), ".+x");
}
#[test]
fn rewrite_all_repetitions() {
assert_eq!(
rewrite("; {0,2}(T-(?:07|[^0][0-9])[^;/]{1,100}?)(?: Build|\\) AppleWebKit)"),
"; {0,2}(T-(?:07|[^0][0-9])[^;/]+?)(?: Build|\\) AppleWebKit)",
);
assert_eq!(
rewrite(
"; {0,2}(SH\\-?[0-9][0-9][^;/]{1,100}|SBM[0-9][^;/]{1,100}?)(?: Build|\\) AppleWebKit)"
),
"; {0,2}(SH\\-?[0-9][0-9][^;/]+|SBM[0-9][^;/]+?)(?: Build|\\) AppleWebKit)",
)
}
#[test]
fn ignore_non_repetitions() {
assert_eq!(
rewrite(r"\{1,2}"),
r"\{1,2}",
"if the opening brace is escaped it's not a repetition"
);
assert_eq!(
rewrite("[.{1,100}]"),
"[.{1,100}]",
"inside a set it's not a repetition"
);
}
#[test]
fn rewrite_classes() {
assert_eq!(rewrite(r"\dx"), "[0-9]x");
assert_eq!(rewrite(r"\wx"), "[A-Za-z0-9_]x");
assert_eq!(rewrite(r"[\d]x"), r"[[0-9]]x");
}
}