use regex::{Regex, RegexBuilder};
use regex_syntax;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::{convert::TryFrom, fmt};
use thiserror::Error;
use url::{Position, Url};
type Result<T> = std::result::Result<T, Error>;
#[derive(Error, Debug)]
pub enum Error {
#[error("could not identify any url scheme component for pattern {0:?}")]
MissingScheme(String),
#[error("could not identify any path component for pattern {0:?}")]
MissingPath(String),
#[error("failed to compile regex {pattern_regex:?} (generated from {pattern_source:?})")]
RegexCompile {
pattern_source: String,
pattern_regex: String,
#[source]
source: regex::Error,
},
}
#[derive(Debug, Clone)]
enum Schemes {
All,
Wildcard,
SpecificScheme(String),
}
impl Schemes {
fn include(&self, scheme: &str) -> bool {
match self {
Schemes::All => true,
Schemes::Wildcard => WILDCARD_SCHEMES.iter().any(|s| *s == scheme),
Schemes::SpecificScheme(specific_scheme) => *specific_scheme == scheme,
}
}
}
#[derive(Debug, Clone)]
enum Hosts {
All,
SpecificHost(Option<String>),
SpecificHostWithSubdomains(String),
}
impl Hosts {
fn include(&self, host: Option<&str>) -> bool {
let host = host.map(|h| h.to_lowercase());
match self {
Hosts::All => true,
Hosts::SpecificHost(specific_host) => host == *specific_host,
Hosts::SpecificHostWithSubdomains(specific_host) => {
if let Some(host) = host {
if host.len() > specific_host.len() {
let subdomain_offset = host.len() - specific_host.len();
if host.chars().nth(subdomain_offset - 1).unwrap() != '.' {
return false;
}
&host[subdomain_offset..] == *specific_host
} else {
host == *specific_host
}
} else {
false
}
}
}
}
}
#[derive(Debug, Clone)]
enum Paths {
All,
MatchingPattern(Regex),
}
impl Paths {
fn include(&self, path: &str) -> bool {
match self {
Paths::All => true,
Paths::MatchingPattern(pattern) => pattern.is_match(path),
}
}
}
#[cfg_attr(feature = "serde", serde(try_from = "String", into = "String"))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Pattern {
source: String,
schemes: Schemes,
hosts: Hosts,
paths: Paths,
}
static WILDCARD_SCHEMES: &'static [&str] = &["http", "https", "ws", "wss"];
impl Pattern {
pub fn wildcard() -> Pattern {
Self::wildcard_from_source("<all_urls>")
}
fn wildcard_from_source(source: &str) -> Pattern {
Self {
source: source.to_string(),
schemes: Schemes::All,
hosts: Hosts::All,
paths: Paths::All,
}
}
pub fn new(source: &str, relaxed: bool) -> Result<Pattern> {
if source == "<all_urls>" {
return Ok(Self::wildcard_from_source(source));
}
if source == "*" && relaxed {
return Ok(Self::wildcard_from_source(source));
}
let original_source = source;
let end_of_scheme = source.find("://");
let (source, schemes) = if let Some(end_of_scheme) = end_of_scheme {
let scheme = &source[..end_of_scheme];
if scheme == "*" {
(&source[end_of_scheme + 3..], Schemes::Wildcard)
} else {
(
&source[end_of_scheme + 3..],
Schemes::SpecificScheme(scheme.to_lowercase()),
)
}
} else {
if !relaxed {
return Err(Error::MissingScheme(original_source.to_string()));
}
(source, Schemes::Wildcard)
};
let end_of_host = source.find("/").unwrap_or(source.len());
let host = &source[..end_of_host];
let hosts = if host == "*" {
Hosts::All
} else if host.starts_with("*.") {
Hosts::SpecificHostWithSubdomains(host[2..].to_lowercase())
} else if host.len() > 0 {
Hosts::SpecificHost(Some(host.to_lowercase()))
} else {
Hosts::SpecificHost(None)
};
let path = &source[end_of_host..];
let paths = if path.is_empty() {
if relaxed {
Paths::All
} else {
return Err(Error::MissingPath(original_source.to_string()));
}
} else if relaxed && path == "/" {
Paths::All
} else {
Paths::MatchingPattern(Self::glob_to_regex(relaxed, path)?)
};
Ok(Self {
source: source.to_string(),
schemes,
hosts,
paths,
})
}
pub fn is_match(&self, url: &Url) -> bool {
self.schemes.include(url.scheme())
&& self.hosts.include(url.host_str())
&& self
.paths
.include(&url[Position::BeforePath..Position::AfterQuery])
}
fn glob_to_regex(relaxed: bool, glob: &str) -> Result<Regex> {
let mut regex_pattern = String::with_capacity(glob.len() * 2);
regex_pattern.push('^');
for c in glob.chars() {
if c == '*' {
regex_pattern.push_str(".*");
} else {
if regex_syntax::is_meta_character(c) {
regex_pattern.push('\\');
}
regex_pattern.push(c);
}
}
regex_pattern.push('$');
RegexBuilder::new(®ex_pattern)
.case_insensitive(relaxed)
.build()
.map_err(|err| Error::RegexCompile {
pattern_source: glob.to_string(),
pattern_regex: regex_pattern,
source: err,
})
}
}
impl Into<String> for Pattern {
fn into(self) -> String {
self.source
}
}
impl TryFrom<String> for Pattern {
type Error = Error;
fn try_from(raw: String) -> Result<Self> {
Pattern::new(&raw, true)
}
}
impl fmt::Display for Pattern {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:#?}", self.source)
}
}
#[cfg(test)]
mod tests {
use super::*;
type TestResult = std::result::Result<(), Box<dyn std::error::Error>>;
macro_rules! assert_err {
($expression:expr, $($pattern:tt)+) => {
match $expression {
$($pattern)+ => (),
ref e => panic!("expected `{}` but got `{:?}`", stringify!($($pattern)+), e),
}
}
}
macro_rules! assert_pattern_does_match {
($pattern:expr, $matching_urls:expr) => {
for url in ($matching_urls).iter().map(|u| Url::parse(u)) {
let url = url?;
assert!($pattern.is_match(&url), "url = {}", url.to_string());
}
};
}
macro_rules! assert_pattern_does_not_match {
($pattern:expr, $matching_urls:expr) => {
for url in ($matching_urls).iter().map(|u| Url::parse(u)) {
let url = url?;
assert!(!$pattern.is_match(&url), "url = {}", url.to_string());
}
};
}
mod mozilla_patterns {
use super::*;
#[test]
fn all_urls() -> TestResult {
let p = Pattern::new("<all_urls>", false)?;
assert_pattern_does_match!(
p,
[
"http://example.org/",
"https://a.org/some/path/",
"ws://sockets.somewhere.org/",
"wss://ws.example.com/stuff/",
"ftp://files.somewhere.org/",
"ftps://files.somewhere.org/",
]
);
Ok(())
}
#[test]
fn all_wildcards() -> TestResult {
let p = Pattern::new("*://*/*", false)?;
assert_pattern_does_match!(
p,
[
"http://example.org/",
"https://a.org/some/path/",
"ws://sockets.somewhere.org/",
"wss://ws.example.com/stuff/",
]
);
assert_pattern_does_not_match!(
p,
[
"ftp://ftp.example.org/",
"ftps://ftp.example.org/",
"file:///a/",
]
);
Ok(())
}
#[test]
fn subdomain_wildcard() -> TestResult {
let p = Pattern::new("*://*.mozilla.org/*", false)?;
assert_pattern_does_match!(
p,
[
"http://mozilla.org/",
"https://mozilla.org/",
"http://a.mozilla.org/",
"http://a.b.mozilla.org/",
"https://b.mozilla.org/path/",
"ws://ws.mozilla.org/",
"wss://secure.mozilla.org/something",
]
);
assert_pattern_does_not_match!(
p,
[
"ftp://mozilla.org/",
"http://mozilla.com/",
"http://firefox.org/",
]
);
Ok(())
}
#[test]
fn scheme_wildcard() -> TestResult {
let p = Pattern::new("*://mozilla.org/", false)?;
assert_pattern_does_match!(
p,
[
"http://mozilla.org/",
"https://mozilla.org/",
"ws://mozilla.org/",
"wss://mozilla.org/",
]
);
assert_pattern_does_not_match!(
p,
[
"ftp://mozilla.org/",
"http://a.mozilla.org/",
"http://mozilla.org/a",
]
);
Ok(())
}
#[test]
fn all_fixed() -> TestResult {
let p = Pattern::new("ftp://mozilla.org/", false)?;
assert_pattern_does_match!(p, ["ftp://mozilla.org"]);
assert_pattern_does_not_match!(
p,
[
"http://mozilla.org/",
"ftp://sub.mozilla.org/",
"ftp://mozilla.org/path",
]
);
Ok(())
}
#[test]
fn wildcard_host() -> TestResult {
let p = Pattern::new("https://*/path", false)?;
assert_pattern_does_match!(
p,
[
"https://mozilla.org/path",
"https://a.mozilla.org/path",
"https://something.com/path",
]
);
assert_pattern_does_not_match!(
p,
[
"http://mozilla.org/path",
"https://mozilla.org/path/",
"https://mozilla.org/a",
"https://mozilla.org/",
"https://mozilla.org/path?foo=1",
]
);
Ok(())
}
#[test]
fn wildcard_host_trailing_slash() -> TestResult {
let p = Pattern::new("https://*/path/", false)?;
assert_pattern_does_match!(
p,
[
"https://mozilla.org/path/",
"https://a.mozilla.org/path/",
"https://something.com/path/",
]
);
assert_pattern_does_not_match!(
p,
[
"http://mozilla.org/path/",
"https://mozilla.org/path",
"https://mozilla.org/a",
"https://mozilla.org/",
"https://mozilla.org/path/?foo=1",
]
);
Ok(())
}
#[test]
fn wildcard_path() -> TestResult {
let p = Pattern::new("https://mozilla.org/*", false)?;
assert_pattern_does_match!(
p,
[
"https://mozilla.org/",
"https://mozilla.org/path",
"https://mozilla.org/another",
"https://mozilla.org/path/to/doc",
"https://mozilla.org/path/to/doc?foo=1",
]
);
assert_pattern_does_not_match!(
p,
[
"http://mozilla.org/path",
"https://mozilla.com/path",
]
);
Ok(())
}
#[test]
fn all_fixed_http() -> TestResult {
let p = Pattern::new("https://mozilla.org/a/b/c/", false)?;
assert_pattern_does_match!(
p,
[
"https://mozilla.org/a/b/c/",
"https://mozilla.org/a/b/c/#section1",
]
);
Ok(())
}
#[test]
fn multiple_wildcard_path() -> TestResult {
let p = Pattern::new("https://mozilla.org/*/b/*/", false)?;
assert_pattern_does_match!(
p,
[
"https://mozilla.org/a/b/c/",
"https://mozilla.org/d/b/f/",
"https://mozilla.org/a/b/c/d/",
"https://mozilla.org/a/b/c/d/#section1",
"https://mozilla.org/a/b/c/d/?foo=/",
"https://mozilla.org/a?foo=21314&bar=/b/&extra=c/",
]
);
assert_pattern_does_not_match!(
p,
[
"https://mozilla.org/b/*/",
"https://mozilla.org/a/b/",
"https://mozilla.org/a/b/c/d/?foo=bar",
]
);
Ok(())
}
#[test]
fn file_scheme_path_wildcard() -> TestResult {
let p = Pattern::new("file:///blah/*", false)?;
assert_pattern_does_match!(p, ["file:///blah/", "file:///blah/bleh"]);
assert_pattern_does_not_match!(
p,
[
"file:///bleh/"
]
);
Ok(())
}
#[test]
fn parse_errors() {
assert_err!(
Pattern::new("https://mozilla.org", false),
Err(Error::MissingPath(_))
);
assert_err!(Pattern::new(" *://*", false), Err(Error::MissingPath(_)));
assert_err!(Pattern::new(" *://*", false), Err(Error::MissingPath(_)));
}
}
mod our_patterns {
use super::*;
#[test]
fn host_case_insensitivity() -> TestResult {
let p = Pattern::new("https://moZilla.org/", false)?;
assert_pattern_does_match!(
p,
[
"https://moZilla.org/",
"https://mozilla.org/",
"https://MOZILLA.org/",
]
);
Ok(())
}
#[test]
fn protocol_case_insensitivity() -> TestResult {
let p = Pattern::new("httpS://mozilla.org/", false)?;
assert_pattern_does_match!(
p,
[
"httpS://mozilla.org/",
"https://mozilla.org/",
"HTTPs://mozilla.org/",
]
);
Ok(())
}
#[test]
fn relaxed_path_case_insensitivity() -> TestResult {
let p = Pattern::new("https://mozilla.org/*/Test/*/", true)?;
assert_pattern_does_match!(
p,
[
"https://mozilla.org/One/Test/Two/",
"https://mozilla.org/one/TEST/two/",
"https://mozilla.org/ONE/test/two/",
"https://mozilla.org/one/tESt/TWO/"
]
);
Ok(())
}
#[test]
fn strict_path_case_sensitivity() -> TestResult {
let p = Pattern::new("https://mozilla.org/*/Test/*/", false)?;
assert_pattern_does_match!(p, ["https://mozilla.org/One/Test/Two/"]);
assert_pattern_does_not_match!(
p,
[
"https://mozilla.org/one/TEST/two/",
"https://mozilla.org/ONE/test/two/",
"https://mozilla.org/one/tESt/TWO/"
]
);
Ok(())
}
}
}