use crate::compiler::function::EnumVariant;
use crate::compiler::prelude::*;
use std::{
borrow::Cow,
collections::BTreeMap,
fmt,
str::FromStr,
sync::{Arc, LazyLock},
};
use woothee::parser::Parser as WootheeParser;
static UA_EXTRACTOR: LazyLock<ua_parser::Extractor> = LazyLock::new(|| {
let regexes = include!(concat!(env!("OUT_DIR"), "/user_agent_regexes.rs"));
ua_parser::Extractor::try_from(regexes).expect("Regex file is not valid.")
});
static DEFAULT_MODE: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("fast")));
static MODE_ENUM: &[EnumVariant] = &[
EnumVariant {
value: "fast",
description: "Fastest mode but most unreliable. Uses parser from project [Woothee](https://github.com/woothee/woothee).",
},
EnumVariant {
value: "reliable",
description: indoc! {"
Provides greater reliability than `fast` and retains it's speed in common cases.
Parses with [Woothee](https://github.com/woothee/woothee) parser and with parser from
[uap project](https://github.com/ua-parser/uap-core) if there are some missing fields
that the first parser wasn't able to parse out but the second one maybe can.
"},
},
EnumVariant {
value: "enriched",
description: indoc! {"
Parses with both parser from [Woothee](https://github.com/woothee/woothee) and parser from
[uap project](https://github.com/ua-parser/uap-core) and combines results. Result has the full schema.
"},
},
];
static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
vec![
Parameter::required("value", kind::BYTES, "The string to parse."),
Parameter::optional(
"mode",
kind::BYTES,
"Determines performance and reliability characteristics.",
)
.default(&DEFAULT_MODE)
.enum_variants(MODE_ENUM),
]
});
#[derive(Clone, Copy, Debug)]
pub struct ParseUserAgent;
impl Function for ParseUserAgent {
fn identifier(&self) -> &'static str {
"parse_user_agent"
}
fn summary(&self) -> &'static str {
"parse a user agent string"
}
fn usage(&self) -> &'static str {
indoc! {"
Parses the provided `value` as a user agent, which has
[a loosely defined format](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent).
Parses on the basis of best effort. Returned schema depends only on the configured `mode`,
so if the function fails to parse a field it will set it to `null`.
"}
}
fn category(&self) -> &'static str {
Category::Parse.as_ref()
}
fn return_kind(&self) -> u16 {
kind::OBJECT
}
fn notices(&self) -> &'static [&'static str] {
&[
indoc! {"
All values are returned as strings or as null. We recommend manually coercing values
to desired types as you see fit.
"},
"Different modes return different schema.",
"Field which were not parsed out are set as `null`.",
]
}
fn parameters(&self) -> &'static [Parameter] {
PARAMETERS.as_slice()
}
fn examples(&self) -> &'static [Example] {
&[
example! {
title: "Fast mode",
source: indoc! {r#"
parse_user_agent(
"Mozilla Firefox 1.0.1 Mozilla/5.0 (X11; U; Linux i686; de-DE; rv:1.7.6) Gecko/20050223 Firefox/1.0.1"
)
"#},
result: Ok(indoc! {r#"
{
"browser": {
"family": "Firefox",
"version": "1.0.1"
},
"device": {
"category": "pc"
},
"os": {
"family": "Linux",
"version": null
}
}
"#}),
},
example! {
title: "Reliable mode",
source: indoc! {r#"
parse_user_agent(
"Mozilla/4.0 (compatible; MSIE 7.66; Windows NT 5.1; SV1; .NET CLR 1.1.4322)",
mode: "reliable")
"#},
result: Ok(indoc! {r#"
{
"browser": {
"family": "Internet Explorer",
"version": "7.66"
},
"device": {
"category": "pc"
},
"os": {
"family": "Windows XP",
"version": "NT 5.1"
}
}
"#}),
},
example! {
title: "Enriched mode",
source: indoc! {r#"
parse_user_agent(
"Opera/9.80 (J2ME/MIDP; Opera Mini/4.3.24214; iPhone; CPU iPhone OS 4_2_1 like Mac OS X; AppleWebKit/24.783; U; en) Presto/2.5.25 Version/10.54",
mode: "enriched"
)
"#},
result: Ok(indoc! {r#"
{
"browser": {
"family": "Opera Mini",
"major": "4",
"minor": "3",
"patch": "24214",
"version": "10.54"
},
"device": {
"brand": "Apple",
"category": "smartphone",
"family": "iPhone",
"model": "iPhone"
},
"os": {
"family": "iOS",
"major": "4",
"minor": "2",
"patch": "1",
"patch_minor": null,
"version": "4.2.1"
}
}
"#}),
},
]
}
fn compile(
&self,
state: &state::TypeState,
_ctx: &mut FunctionCompileContext,
arguments: ArgumentList,
) -> Compiled {
let value = arguments.required("value");
let mode = arguments
.optional_enum("mode", &Mode::all_value(), state)?
.unwrap_or_else(|| DEFAULT_MODE.clone())
.try_bytes_utf8_lossy()
.map(|s| Mode::from_str(&s).expect("validated enum"))
.expect("mode not bytes");
let parser = match mode {
Mode::Fast => {
let parser = WootheeParser::new();
Arc::new(move |s: &str| parser.parse_user_agent(s).partial_schema()) as Arc<_>
}
Mode::Reliable => {
let fast = WootheeParser::new();
let slow = &UA_EXTRACTOR;
Arc::new(move |s: &str| {
let ua = fast.parse_user_agent(s);
let ua = if ua.browser.family.is_none() || ua.os.family.is_none() {
let better_ua = slow.parse_user_agent(s);
better_ua.or(ua)
} else {
ua
};
ua.partial_schema()
}) as Arc<_>
}
Mode::Enriched => {
let fast = WootheeParser::new();
let slow = &UA_EXTRACTOR;
Arc::new(move |s: &str| {
slow.parse_user_agent(s)
.or(fast.parse_user_agent(s))
.full_schema()
}) as Arc<_>
}
};
Ok(ParseUserAgentFn {
value,
mode,
parser,
}
.as_expr())
}
}
#[derive(Clone)]
struct ParseUserAgentFn {
value: Box<dyn Expression>,
mode: Mode,
parser: Arc<dyn Fn(&str) -> Value + Send + Sync>,
}
impl FunctionExpression for ParseUserAgentFn {
fn resolve(&self, ctx: &mut Context) -> Resolved {
let value = self.value.resolve(ctx)?;
let string = value.try_bytes_utf8_lossy()?;
Ok((self.parser)(&string))
}
fn type_def(&self, _: &state::TypeState) -> TypeDef {
self.mode.type_def()
}
}
impl fmt::Debug for ParseUserAgentFn {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"ParseUserAgentFn{{ value: {:?}, mode: {:?}}}",
self.value, self.mode
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub(crate) enum Mode {
#[default]
Fast,
Reliable,
Enriched,
}
impl Mode {
fn all_value() -> Vec<Value> {
use Mode::{Enriched, Fast, Reliable};
vec![Fast, Reliable, Enriched]
.into_iter()
.map(|u| u.as_str().into())
.collect::<Vec<_>>()
}
const fn as_str(self) -> &'static str {
use Mode::{Enriched, Fast, Reliable};
match self {
Fast => "fast",
Reliable => "reliable",
Enriched => "enriched",
}
}
fn type_def(self) -> TypeDef {
match self {
Mode::Fast | Mode::Reliable => TypeDef::object(BTreeMap::from([
(
"browser".into(),
Kind::object(BTreeMap::from([
("family".into(), Kind::bytes().or_null()),
("version".into(), Kind::bytes().or_null()),
])),
),
(
"os".into(),
Kind::object(BTreeMap::from([
("family".into(), Kind::bytes().or_null()),
("version".into(), Kind::bytes().or_null()),
])),
),
(
"device".into(),
Kind::object(BTreeMap::from([(
"category".into(),
Kind::bytes().or_null(),
)])),
),
])),
Mode::Enriched => TypeDef::object(BTreeMap::from([
(
"browser".into(),
Kind::object(BTreeMap::from([
("family".into(), Kind::bytes().or_null()),
("version".into(), Kind::bytes().or_null()),
("major".into(), Kind::bytes().or_null()),
("minor".into(), Kind::bytes().or_null()),
("patch".into(), Kind::bytes().or_null()),
])),
),
(
"os".into(),
Kind::object(BTreeMap::from([
("family".into(), Kind::bytes().or_null()),
("version".into(), Kind::bytes().or_null()),
("major".into(), Kind::bytes().or_null()),
("minor".into(), Kind::bytes().or_null()),
("patch".into(), Kind::bytes().or_null()),
("patch_minor".into(), Kind::bytes().or_null()),
])),
),
(
"device".into(),
Kind::object(BTreeMap::from([
("family".into(), Kind::bytes().or_null()),
("category".into(), Kind::bytes().or_null()),
("brand".into(), Kind::bytes().or_null()),
("model".into(), Kind::bytes().or_null()),
])),
),
])),
}
}
}
impl FromStr for Mode {
type Err = &'static str;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
use Mode::{Enriched, Fast, Reliable};
match s {
"fast" => Ok(Fast),
"reliable" => Ok(Reliable),
"enriched" => Ok(Enriched),
_ => Err("unknown mode variant"),
}
}
}
#[derive(Default)]
struct UserAgent {
browser: Browser,
os: Os,
device: Device,
}
impl UserAgent {
fn partial_schema(self) -> Value {
let Self {
browser,
os,
device,
} = self;
IntoIterator::into_iter([
("browser", browser.partial_schema()),
("os", os.partial_schema()),
("device", device.partial_schema()),
])
.map(|(name, value)| (name.to_string(), value))
.collect()
}
fn full_schema(self) -> Value {
let Self {
browser,
os,
device,
} = self;
IntoIterator::into_iter([
("browser", browser.full_schema()),
("os", os.full_schema()),
("device", device.full_schema()),
])
.map(|(name, value)| (name.to_string(), value))
.collect()
}
fn or(self, other: Self) -> Self {
Self {
browser: self.browser.or(other.browser),
os: self.os.or(other.os),
device: self.device.or(other.device),
}
}
}
#[derive(Default)]
struct Browser {
family: Option<String>,
version: Option<String>,
major: Option<String>,
minor: Option<String>,
patch: Option<String>,
}
impl Browser {
fn partial_schema(self) -> Value {
let Self {
family, version, ..
} = self;
into_value([("family", family), ("version", version)])
}
fn full_schema(self) -> Value {
let Self {
family,
version,
major,
minor,
patch,
} = self;
into_value([
("family", family),
("version", version),
("major", major),
("minor", minor),
("patch", patch),
])
}
fn or(self, other: Self) -> Self {
Self {
family: self.family.or(other.family),
version: self.version.or(other.version),
major: self.major.or(other.major),
minor: self.minor.or(other.minor),
patch: self.patch.or(other.patch),
}
}
}
#[derive(Default)]
struct Os {
family: Option<String>,
version: Option<String>,
major: Option<String>,
minor: Option<String>,
patch: Option<String>,
patch_minor: Option<String>,
}
impl Os {
fn partial_schema(self) -> Value {
let Self {
family, version, ..
} = self;
into_value([("family", family), ("version", version)])
}
fn full_schema(self) -> Value {
let Self {
family,
version,
major,
minor,
patch,
patch_minor,
} = self;
into_value([
("family", family),
("version", version),
("major", major),
("minor", minor),
("patch", patch),
("patch_minor", patch_minor),
])
}
fn or(self, other: Self) -> Self {
Self {
family: self.family.or(other.family),
version: self.version.or(other.version),
major: self.major.or(other.major),
minor: self.minor.or(other.minor),
patch: self.patch.or(other.patch),
patch_minor: self.patch_minor.or(other.patch_minor),
}
}
}
#[derive(Default)]
struct Device {
family: Option<String>,
category: Option<String>,
brand: Option<String>,
model: Option<String>,
}
impl Device {
fn partial_schema(self) -> Value {
let Self { category, .. } = self;
into_value([("category", category)])
}
fn full_schema(self) -> Value {
let Self {
category,
family,
brand,
model,
} = self;
into_value([
("category", category),
("family", family),
("brand", brand),
("model", model),
])
}
fn or(self, other: Self) -> Self {
Self {
category: self.category.or(other.category),
family: self.family.or(other.family),
brand: self.brand.or(other.brand),
model: self.model.or(other.model),
}
}
}
fn into_value<'a>(iter: impl IntoIterator<Item = (&'a str, Option<String>)>) -> Value {
iter.into_iter()
.map(|(name, value)| {
(
name.to_string(),
value.map_or(Value::Null, std::convert::Into::into),
)
})
.collect()
}
trait Parser {
fn parse_user_agent(&self, user_agent: &str) -> UserAgent;
}
impl Parser for WootheeParser {
fn parse_user_agent(&self, user_agent: &str) -> UserAgent {
fn unknown_to_none<'a>(s: impl Into<Cow<'a, str>>) -> Option<String> {
let cow = s.into();
match cow.as_ref() {
"" | woothee::woothee::VALUE_UNKNOWN => None,
_ => Some(cow.into_owned()),
}
}
let ua = self.parse(user_agent).unwrap_or_default();
UserAgent {
browser: Browser {
family: unknown_to_none(ua.name),
version: unknown_to_none(ua.version),
..Default::default()
},
os: Os {
family: unknown_to_none(ua.os),
version: unknown_to_none(ua.os_version),
..Default::default()
},
device: Device {
category: unknown_to_none(ua.category),
..Default::default()
},
}
}
}
impl Parser for ua_parser::Extractor<'_> {
fn parse_user_agent(&self, user_agent: &str) -> UserAgent {
let browser = self
.ua
.extract(user_agent)
.map(|ua| Browser {
family: Some(ua.family.into_owned()),
major: ua.major.map(Into::into),
minor: ua.minor.map(Into::into),
patch: ua.patch.map(Into::into),
..Default::default()
})
.unwrap_or_default();
let os = self
.os
.extract(user_agent)
.map(|os| Os {
family: Some(os.os.into_owned()),
major: os.major.map(Cow::into_owned),
minor: os.minor.map(Cow::into_owned),
patch: os.patch.map(Cow::into_owned),
patch_minor: os.patch_minor.map(Cow::into_owned),
..Default::default()
})
.unwrap_or_default();
let device = self
.dev
.extract(user_agent)
.map(|dev| Device {
family: Some(dev.device.into_owned()),
brand: dev.brand.map(Cow::into_owned),
model: dev.model.map(Cow::into_owned),
..Default::default()
})
.unwrap_or_default();
UserAgent {
browser,
os,
device,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::value;
test_function![
parse_user_agent => ParseUserAgent;
parses {
args: func_args![ value: "Mozilla/4.0 (compatible; MSIE 7.66; Windows NT 5.1; SV1)" ],
want: Ok(value!({ browser: { family: "Internet Explorer", version: "7.66" }, device: { category: "pc" }, os: { family: "Windows XP", version: "NT 5.1" } })),
tdef: Mode::Fast.type_def(),
}
unknown_user_agent {
args: func_args![ value: "w3m/0.3", mode: "enriched"],
want: Ok(value!({ browser: { family: null, major: null, minor: null, patch: null, version: null }, device: { brand: null, category: null, family: null, model: null }, os: { family: null, major: null, minor: null, patch: null, patch_minor: null, version: null } })),
tdef: Mode::Enriched.type_def(),
}
];
}