mod canonicalize_and_process;
mod component;
mod constructor_parser;
mod error;
mod matcher;
mod parser;
pub mod quirks;
mod regexp;
mod tokenizer;
pub use error::Error;
use url::Url;
use crate::canonicalize_and_process::is_special_scheme;
use crate::canonicalize_and_process::special_scheme_default_port;
use crate::component::Component;
use crate::regexp::RegExp;
#[derive(Debug, Default, Clone, Eq, PartialEq)]
pub struct UrlPatternInit {
pub protocol: Option<String>,
pub username: Option<String>,
pub password: Option<String>,
pub hostname: Option<String>,
pub port: Option<String>,
pub pathname: Option<String>,
pub search: Option<String>,
pub hash: Option<String>,
pub base_url: Option<Url>,
}
impl UrlPatternInit {
pub fn parse_constructor_string<R: RegExp>(
pattern: &str,
base_url: Option<Url>,
) -> Result<UrlPatternInit, Error> {
let mut init = constructor_parser::parse_constructor_string::<R>(pattern)?;
if base_url.is_none() && init.protocol.is_none() {
return Err(Error::BaseUrlRequired);
}
init.base_url = base_url;
Ok(init)
}
#[allow(clippy::too_many_arguments)]
fn process(
&self,
kind: canonicalize_and_process::ProcessType,
protocol: Option<String>,
username: Option<String>,
password: Option<String>,
hostname: Option<String>,
port: Option<String>,
pathname: Option<String>,
search: Option<String>,
hash: Option<String>,
) -> Result<UrlPatternInit, Error> {
let mut result = UrlPatternInit {
protocol,
username,
password,
hostname,
port,
pathname,
search,
hash,
base_url: None,
};
let base_url = if let Some(parsed_base_url) = &self.base_url {
result.protocol = Some(parsed_base_url.scheme().to_string());
result.username = Some(parsed_base_url.username().to_string());
result.password =
Some(parsed_base_url.password().unwrap_or_default().to_string());
result.hostname =
Some(parsed_base_url.host_str().unwrap_or_default().to_string());
result.port = Some(url::quirks::port(parsed_base_url).to_string());
result.pathname =
Some(url::quirks::pathname(parsed_base_url).to_string());
result.search = Some(parsed_base_url.query().unwrap_or("").to_string());
result.hash = Some(parsed_base_url.fragment().unwrap_or("").to_string());
Some(parsed_base_url)
} else {
None
};
if let Some(protocol) = &self.protocol {
result.protocol = Some(canonicalize_and_process::process_protocol_init(
protocol, &kind,
)?);
}
if let Some(username) = &self.username {
result.username = Some(canonicalize_and_process::process_username_init(
username, &kind,
)?);
}
if let Some(password) = &self.password {
result.password = Some(canonicalize_and_process::process_password_init(
password, &kind,
)?);
}
if let Some(hostname) = &self.hostname {
result.hostname = Some(canonicalize_and_process::process_hostname_init(
hostname, &kind,
)?);
}
if let Some(port) = &self.port {
result.port = Some(canonicalize_and_process::process_port_init(
port,
result.protocol.as_deref(),
&kind,
)?);
}
if let Some(pathname) = &self.pathname {
result.pathname = Some(pathname.clone());
if let Some(base_url) = base_url {
if !base_url.cannot_be_a_base()
&& !is_absolute_pathname(pathname, &kind)
{
let baseurl_path = url::quirks::pathname(base_url);
let slash_index = baseurl_path.rfind('/');
if let Some(slash_index) = slash_index {
let new_pathname = baseurl_path[..=slash_index].to_string();
result.pathname =
Some(format!("{}{}", new_pathname, result.pathname.unwrap()));
}
}
}
result.pathname = Some(canonicalize_and_process::process_pathname_init(
&result.pathname.unwrap(),
result.protocol.as_deref(),
&kind,
)?);
}
if let Some(search) = &self.search {
result.search = Some(canonicalize_and_process::process_search_init(
search, &kind,
)?);
}
if let Some(hash) = &self.hash {
result.hash =
Some(canonicalize_and_process::process_hash_init(hash, &kind)?);
}
Ok(result)
}
}
fn is_absolute_pathname(
input: &str,
kind: &canonicalize_and_process::ProcessType,
) -> bool {
if input.is_empty() {
return false;
}
if input.starts_with('/') {
return true;
}
if kind == &canonicalize_and_process::ProcessType::Url {
return false;
}
if input.len() < 2 {
return false;
}
input.starts_with("\\/") || input.starts_with("{/")
}
#[derive(Debug)]
pub struct UrlPattern<R: RegExp = regex::Regex> {
protocol: Component<R>,
username: Component<R>,
password: Component<R>,
hostname: Component<R>,
port: Component<R>,
pathname: Component<R>,
search: Component<R>,
hash: Component<R>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum UrlPatternMatchInput {
Init(UrlPatternInit),
Url(Url),
}
impl<R: RegExp> UrlPattern<R> {
pub fn parse(init: UrlPatternInit) -> Result<Self, Error> {
Self::parse_internal(init, true)
}
pub(crate) fn parse_internal(
init: UrlPatternInit,
report_regex_errors: bool,
) -> Result<Self, Error> {
let mut processed_init = init.process(
canonicalize_and_process::ProcessType::Pattern,
None,
None,
None,
None,
None,
None,
None,
None,
)?;
if let Some(protocol) = &processed_init.protocol {
if is_special_scheme(protocol) {
let default_port = special_scheme_default_port(protocol);
if default_port == processed_init.port.as_deref() {
processed_init.port = Some(String::new())
}
}
}
let protocol = Component::compile(
processed_init.protocol.as_deref(),
canonicalize_and_process::canonicalize_protocol,
parser::Options::default(),
)?
.optionally_transpose_regex_error(report_regex_errors)?;
let hostname_is_ipv6 = processed_init
.hostname
.as_deref()
.map(hostname_pattern_is_ipv6_address)
.unwrap_or(false);
let hostname = if hostname_is_ipv6 {
Component::compile(
processed_init.hostname.as_deref(),
canonicalize_and_process::canonicalize_ipv6_hostname,
parser::Options::hostname(),
)?
.optionally_transpose_regex_error(report_regex_errors)?
} else {
Component::compile(
processed_init.hostname.as_deref(),
canonicalize_and_process::canonicalize_hostname,
parser::Options::hostname(),
)?
.optionally_transpose_regex_error(report_regex_errors)?
};
let pathname = if protocol.protocol_component_matches_special_scheme() {
Component::compile(
processed_init.pathname.as_deref(),
canonicalize_and_process::canonicalize_pathname,
parser::Options::pathname(),
)?
.optionally_transpose_regex_error(report_regex_errors)?
} else {
Component::compile(
processed_init.pathname.as_deref(),
canonicalize_and_process::canonicalize_an_opaque_pathname,
parser::Options::default(),
)?
.optionally_transpose_regex_error(report_regex_errors)?
};
Ok(UrlPattern {
protocol,
username: Component::compile(
processed_init.username.as_deref(),
canonicalize_and_process::canonicalize_username,
parser::Options::default(),
)?
.optionally_transpose_regex_error(report_regex_errors)?,
password: Component::compile(
processed_init.password.as_deref(),
canonicalize_and_process::canonicalize_password,
parser::Options::default(),
)?
.optionally_transpose_regex_error(report_regex_errors)?,
hostname,
port: Component::compile(
processed_init.port.as_deref(),
|port| canonicalize_and_process::canonicalize_port(port, None),
parser::Options::default(),
)?
.optionally_transpose_regex_error(report_regex_errors)?,
pathname,
search: Component::compile(
processed_init.search.as_deref(),
canonicalize_and_process::canonicalize_search,
parser::Options::default(),
)?
.optionally_transpose_regex_error(report_regex_errors)?,
hash: Component::compile(
processed_init.hash.as_deref(),
canonicalize_and_process::canonicalize_hash,
parser::Options::default(),
)?
.optionally_transpose_regex_error(report_regex_errors)?,
})
}
pub fn protocol(&self) -> &str {
&self.protocol.pattern_string
}
pub fn username(&self) -> &str {
&self.username.pattern_string
}
pub fn password(&self) -> &str {
&self.password.pattern_string
}
pub fn hostname(&self) -> &str {
&self.hostname.pattern_string
}
pub fn port(&self) -> &str {
&self.port.pattern_string
}
pub fn pathname(&self) -> &str {
&self.pathname.pattern_string
}
pub fn search(&self) -> &str {
&self.search.pattern_string
}
pub fn hash(&self) -> &str {
&self.hash.pattern_string
}
pub fn test(&self, input: UrlPatternMatchInput) -> Result<bool, Error> {
self.matches(input).map(|res| res.is_some())
}
pub fn exec(
&self,
input: UrlPatternMatchInput,
) -> Result<Option<UrlPatternResult>, Error> {
self.matches(input)
}
fn matches(
&self,
input: UrlPatternMatchInput,
) -> Result<Option<UrlPatternResult>, Error> {
let input = match crate::quirks::parse_match_input(input) {
Some(input) => input,
None => return Ok(None),
};
let protocol_exec_result = self.protocol.matcher.matches(&input.protocol);
let username_exec_result = self.username.matcher.matches(&input.username);
let password_exec_result = self.password.matcher.matches(&input.password);
let hostname_exec_result = self.hostname.matcher.matches(&input.hostname);
let port_exec_result = self.port.matcher.matches(&input.port);
let pathname_exec_result = self.pathname.matcher.matches(&input.pathname);
let search_exec_result = self.search.matcher.matches(&input.search);
let hash_exec_result = self.hash.matcher.matches(&input.hash);
match (
protocol_exec_result,
username_exec_result,
password_exec_result,
hostname_exec_result,
port_exec_result,
pathname_exec_result,
search_exec_result,
hash_exec_result,
) {
(
Some(protocol_exec_result),
Some(username_exec_result),
Some(password_exec_result),
Some(hostname_exec_result),
Some(port_exec_result),
Some(pathname_exec_result),
Some(search_exec_result),
Some(hash_exec_result),
) => Ok(Some(UrlPatternResult {
protocol: self
.protocol
.create_match_result(input.protocol.clone(), protocol_exec_result),
username: self
.username
.create_match_result(input.username.clone(), username_exec_result),
password: self
.password
.create_match_result(input.password.clone(), password_exec_result),
hostname: self
.hostname
.create_match_result(input.hostname.clone(), hostname_exec_result),
port: self
.port
.create_match_result(input.port.clone(), port_exec_result),
pathname: self
.pathname
.create_match_result(input.pathname.clone(), pathname_exec_result),
search: self
.search
.create_match_result(input.search.clone(), search_exec_result),
hash: self
.hash
.create_match_result(input.hash.clone(), hash_exec_result),
})),
_ => Ok(None),
}
}
}
fn hostname_pattern_is_ipv6_address(input: &str) -> bool {
if input.len() < 2 {
return false;
}
input.starts_with('[') || input.starts_with("{[") || input.starts_with("\\[")
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct UrlPatternResult {
pub protocol: UrlPatternComponentResult,
pub username: UrlPatternComponentResult,
pub password: UrlPatternComponentResult,
pub hostname: UrlPatternComponentResult,
pub port: UrlPatternComponentResult,
pub pathname: UrlPatternComponentResult,
pub search: UrlPatternComponentResult,
pub hash: UrlPatternComponentResult,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct UrlPatternComponentResult {
pub input: String,
pub groups: std::collections::HashMap<String, String>,
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use serde::Deserialize;
use url::Url;
use crate::quirks;
use crate::quirks::StringOrInit;
use crate::UrlPatternComponentResult;
use crate::UrlPatternResult;
use super::UrlPattern;
use super::UrlPatternInit;
#[derive(Deserialize)]
#[serde(untagged)]
#[allow(clippy::large_enum_variant)]
enum ExpectedMatch {
String(String),
MatchResult(MatchResult),
}
#[derive(Debug, Deserialize)]
struct ComponentResult {
input: String,
groups: HashMap<String, String>,
}
#[derive(Deserialize)]
struct TestCase {
skip: Option<String>,
pattern: Vec<quirks::StringOrInit>,
#[serde(default)]
inputs: Vec<quirks::StringOrInit>,
expected_obj: Option<quirks::StringOrInit>,
expected_match: Option<ExpectedMatch>,
#[serde(default)]
exactly_empty_components: Vec<String>,
}
#[derive(Debug, Deserialize)]
struct MatchResult {
#[serde(deserialize_with = "deserialize_match_result_inputs")]
#[serde(default)]
inputs: Option<(quirks::StringOrInit, Option<String>)>,
protocol: Option<ComponentResult>,
username: Option<ComponentResult>,
password: Option<ComponentResult>,
hostname: Option<ComponentResult>,
port: Option<ComponentResult>,
pathname: Option<ComponentResult>,
search: Option<ComponentResult>,
hash: Option<ComponentResult>,
}
fn deserialize_match_result_inputs<'de, D>(
deserializer: D,
) -> Result<Option<(quirks::StringOrInit, Option<String>)>, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum MatchResultInputs {
OneArgument((quirks::StringOrInit,)),
TwoArguments(quirks::StringOrInit, String),
}
let res = Option::<MatchResultInputs>::deserialize(deserializer)?;
Ok(match res {
Some(MatchResultInputs::OneArgument((a,))) => Some((a, None)),
Some(MatchResultInputs::TwoArguments(a, b)) => Some((a, Some(b))),
None => None,
})
}
fn test_case(case: TestCase) {
let input = case.pattern.get(0).cloned();
let mut base_url = case.pattern.get(1).map(|input| match input {
StringOrInit::String(str) => str.clone(),
StringOrInit::Init(_) => unreachable!(),
});
println!("\n=====");
println!(
"Pattern: {}, {}",
serde_json::to_string(&input).unwrap(),
serde_json::to_string(&base_url).unwrap()
);
if let Some(reason) = case.skip {
println!("🟠 Skipping: {}", reason);
return;
}
let input = input.unwrap_or_else(|| StringOrInit::Init(Default::default()));
let init_res = quirks::process_construct_pattern_input(
input.clone(),
base_url.as_deref(),
);
let res = init_res.and_then(<UrlPattern>::parse);
let expected_obj = match case.expected_obj {
Some(StringOrInit::String(s)) if s == "error" => {
assert!(res.is_err());
println!("✅ Passed");
return;
}
Some(StringOrInit::String(_)) => unreachable!(),
Some(StringOrInit::Init(init)) => {
let base_url = init.base_url.map(|url| url.parse().unwrap());
UrlPatternInit {
protocol: init.protocol,
username: init.username,
password: init.password,
hostname: init.hostname,
port: init.port,
pathname: init.pathname,
search: init.search,
hash: init.hash,
base_url,
}
}
None => UrlPatternInit::default(),
};
let pattern = res.expect("failed to parse pattern");
if let StringOrInit::Init(quirks::UrlPatternInit {
base_url: Some(url),
..
}) = &input
{
base_url = Some(url.clone())
}
macro_rules! assert_field {
($field:ident) => {{
let mut expected = expected_obj.$field;
if expected == None {
if case
.exactly_empty_components
.contains(&stringify!($field).to_owned())
{
expected = Some(String::new())
} else if let StringOrInit::Init(quirks::UrlPatternInit {
$field: Some($field),
..
}) = &input
{
expected = Some($field.to_owned())
} else if let Some(base_url) = &base_url {
let base_url = Url::parse(base_url).unwrap();
let field = url::quirks::$field(&base_url);
let field: String = match stringify!($field) {
"protocol" if !field.is_empty() => {
field[..field.len() - 1].to_owned()
}
"search" | "hash" if !field.is_empty() => field[1..].to_owned(),
_ => field.to_owned(),
};
expected = Some(field)
} else {
expected = Some("*".to_owned())
}
}
let expected = expected.unwrap();
let pattern = &pattern.$field.pattern_string;
assert_eq!(
pattern,
&expected,
"pattern for {} does not match",
stringify!($field)
);
}};
}
assert_field!(protocol);
assert_field!(username);
assert_field!(password);
assert_field!(hostname);
assert_field!(port);
assert_field!(pathname);
assert_field!(search);
assert_field!(hash);
let input = case.inputs.get(0).cloned();
let base_url = case.inputs.get(1).map(|input| match input {
StringOrInit::String(str) => str.clone(),
StringOrInit::Init(_) => unreachable!(),
});
println!(
"Input: {}, {}",
serde_json::to_string(&input).unwrap(),
serde_json::to_string(&base_url).unwrap(),
);
let input = input.unwrap_or_else(|| StringOrInit::Init(Default::default()));
let expected_input = (input.clone(), base_url.clone());
let match_input = quirks::process_match_input(input, base_url.as_deref());
if let Some(ExpectedMatch::String(s)) = &case.expected_match {
if s == "error" {
assert!(match_input.is_err());
println!("✅ Passed");
return;
}
};
let input = match_input.expect("failed to parse match input");
if input.is_none() {
assert!(case.expected_match.is_none());
println!("✅ Passed");
return;
}
let test_res = if let Some((input, _)) = input.clone() {
pattern.test(input)
} else {
Ok(false)
};
let exec_res = if let Some((input, _)) = input.clone() {
pattern.exec(input)
} else {
Ok(None)
};
if let Some(ExpectedMatch::String(s)) = &case.expected_match {
if s == "error" {
assert!(test_res.is_err());
assert!(exec_res.is_err());
println!("✅ Passed");
return;
}
};
let expected_match = case.expected_match.map(|x| match x {
ExpectedMatch::String(_) => unreachable!(),
ExpectedMatch::MatchResult(x) => x,
});
let test = test_res.unwrap();
let actual_match = exec_res.unwrap();
assert_eq!(
test,
expected_match.is_some(),
"pattern.test result is not correct"
);
let expected_match = match expected_match {
Some(x) => x,
None => {
assert!(actual_match.is_none(), "expected match to be None");
println!("✅ Passed");
return;
}
};
let actual_match = actual_match.expect("expected match to be Some");
let expected_inputs = expected_match.inputs.unwrap_or(expected_input);
let (_, inputs) = input.unwrap();
assert_eq!(inputs, expected_inputs, "expected inputs to be identical");
let exactly_empty_components = case.exactly_empty_components;
macro_rules! convert_result {
($component:ident) => {
expected_match
.$component
.map(|c| UrlPatternComponentResult {
input: c.input,
groups: c.groups,
})
.unwrap_or_else(|| {
let mut groups = HashMap::new();
if !exactly_empty_components
.contains(&stringify!($component).to_owned())
{
groups.insert("0".to_owned(), "".to_owned());
}
UrlPatternComponentResult {
input: "".to_owned(),
groups,
}
})
};
}
let expected_result = UrlPatternResult {
protocol: convert_result!(protocol),
username: convert_result!(username),
password: convert_result!(password),
hostname: convert_result!(hostname),
port: convert_result!(port),
pathname: convert_result!(pathname),
search: convert_result!(search),
hash: convert_result!(hash),
};
assert_eq!(
actual_match, expected_result,
"pattern.exec result is not correct"
);
println!("✅ Passed");
}
#[test]
fn test_cases() {
let testdata = include_str!("./testdata/urlpatterntestdata.json");
let cases: Vec<TestCase> = serde_json::from_str(testdata).unwrap();
for case in cases {
test_case(case);
}
}
#[test]
fn issue26() {
<UrlPattern>::parse(UrlPatternInit {
pathname: Some("/:foo.".to_owned()),
..Default::default()
})
.unwrap();
}
}