mod excludes;
mod includes;
use lazy_static::lazy_static;
use std::collections::HashSet;
pub use excludes::Excludes;
pub use includes::Includes;
use crate::Uri;
#[cfg(all(not(test), not(feature = "check_example_domains")))]
lazy_static! {
static ref EXAMPLE_DOMAINS: HashSet<&'static str> =
HashSet::from_iter(["example.com", "example.org", "example.net", "example.edu"]);
}
#[cfg(any(test, feature = "check_example_domains"))]
lazy_static! {
static ref EXAMPLE_DOMAINS: HashSet<&'static str> = HashSet::new();
}
const FALSE_POSITIVE_PAT: &[&str] = &[
r"http://www.w3.org/1999/xhtml",
r"http://www.w3.org/1999/xlink",
r"http://www.w3.org/2000/svg",
r"https://schemas.microsoft.com",
r"http://schemas.zune.net",
r"http://schemas.openxmlformats.org",
];
#[inline]
#[must_use]
pub fn is_false_positive(input: &str) -> bool {
FALSE_POSITIVE_PAT.iter().any(|pat| input.starts_with(pat))
}
#[inline]
#[must_use]
pub fn is_example_domain(uri: &Uri) -> bool {
let res = match uri.domain() {
Some(domain) => {
EXAMPLE_DOMAINS.iter().any(|tld| domain.ends_with(tld))
}
None => {
if uri.is_mail() {
EXAMPLE_DOMAINS.iter().any(|tld| uri.path().ends_with(tld))
} else {
false
}
}
};
res
}
#[allow(clippy::struct_excessive_bools)]
#[derive(Clone, Debug, Default)]
pub struct Filter {
pub includes: Option<Includes>,
pub excludes: Option<Excludes>,
pub schemes: HashSet<String>,
pub exclude_private_ips: bool,
pub exclude_link_local_ips: bool,
pub exclude_loopback_ips: bool,
pub exclude_mail: bool,
}
impl Filter {
#[inline]
#[must_use]
pub fn is_mail_excluded(&self, uri: &Uri) -> bool {
self.exclude_mail && uri.is_mail()
}
#[must_use]
pub fn is_ip_excluded(&self, uri: &Uri) -> bool {
if (self.exclude_loopback_ips && uri.is_loopback())
|| (self.exclude_private_ips && uri.is_private())
|| (self.exclude_link_local_ips && uri.is_link_local())
{
return true;
}
false
}
#[must_use]
pub fn is_host_excluded(&self, uri: &Uri) -> bool {
self.exclude_loopback_ips && uri.domain() == Some("localhost")
}
#[inline]
#[must_use]
pub fn is_scheme_excluded(&self, uri: &Uri) -> bool {
if self.schemes.is_empty() {
return false;
}
!self.schemes.contains(uri.scheme())
}
#[inline]
fn is_includes_empty(&self) -> bool {
!matches!(self.includes, Some(ref includes) if !includes.is_empty())
}
#[inline]
fn is_excludes_empty(&self) -> bool {
!matches!(self.excludes, Some(ref excludes) if !excludes.is_empty())
}
#[inline]
fn is_includes_match(&self, input: &str) -> bool {
matches!(self.includes, Some(ref includes) if includes.is_match(input))
}
#[inline]
fn is_excludes_match(&self, input: &str) -> bool {
matches!(self.excludes, Some(ref excludes) if excludes.is_match(input))
}
#[must_use]
pub fn is_excluded(&self, uri: &Uri) -> bool {
if self.is_mail_excluded(uri)
|| self.is_ip_excluded(uri)
|| self.is_host_excluded(uri)
|| self.is_scheme_excluded(uri)
|| is_example_domain(uri)
{
return true;
}
let input = uri.as_str();
if self.is_includes_empty() {
if self.is_excludes_empty() {
return is_false_positive(input);
}
} else if self.is_includes_match(input) {
return false;
}
if is_false_positive(input)
|| self.is_excludes_empty()
|| self.is_excludes_match(input)
{
return true;
}
false
}
}
#[cfg(test)]
mod tests {
use regex::RegexSet;
use reqwest::Url;
use url::Host;
use super::{Excludes, Filter, Includes};
use crate::{
test_utils::{mail, website},
Uri,
};
const V4_PRIVATE_CLASS_A: &str = "http://10.0.0.1";
const V4_PRIVATE_CLASS_B: &str = "http://172.16.0.1";
const V4_PRIVATE_CLASS_C: &str = "http://192.168.0.1";
const V4_LOOPBACK: &str = "http://127.0.0.1";
const V6_LOOPBACK: &str = "http://[::1]";
const V4_LINK_LOCAL_1: &str = "http://169.254.0.1";
const V4_LINK_LOCAL_2: &str = "http://169.254.10.1:8080";
const V6_MAPPED_V4_PRIVATE_CLASS_A: &str = "http://[::ffff:10.0.0.1]";
const V6_MAPPED_V4_LINK_LOCAL: &str = "http://[::ffff:169.254.0.1]";
macro_rules! assert_ip_address {
(v4: $ip:expr, $predicate:tt) => {
let res = if let Host::Ipv4(ipv4) = Url::parse($ip).map_err(|_| ())?.host().ok_or(())? {
ipv4.$predicate()
} else {
false
};
std::assert!(res);
};
(v6: $ip:expr, $predicate:tt) => {
let res = if let Host::Ipv6(ipv6) = Url::parse($ip).map_err(|_| ())?.host().ok_or(())? {
ipv6.$predicate()
} else {
false
};
std::assert!(res);
};
}
#[allow(clippy::shadow_unrelated)]
#[test]
fn test_const_sanity() -> Result<(), ()> {
assert_ip_address!(v4: V4_PRIVATE_CLASS_A, is_private);
assert_ip_address!(v4: V4_PRIVATE_CLASS_B, is_private);
assert_ip_address!(v4: V4_PRIVATE_CLASS_C, is_private);
assert_ip_address!(v4: V4_LOOPBACK, is_loopback);
assert_ip_address!(v6: V6_LOOPBACK, is_loopback);
assert_ip_address!(v4: V4_LINK_LOCAL_1, is_link_local);
assert_ip_address!(v4: V4_LINK_LOCAL_2, is_link_local);
Ok(())
}
#[test]
fn test_exclude_loopback_ips() {
let filter = Filter {
exclude_loopback_ips: true,
..Filter::default()
};
let uri = Uri::try_from("https://[::1]").unwrap();
assert!(filter.is_excluded(&uri));
let uri = Uri::try_from("https://127.0.0.1/8").unwrap();
assert!(filter.is_excluded(&uri));
}
#[test]
fn test_includes_and_excludes_empty() {
let filter = Filter::default();
assert!(!filter.is_excluded(&website("https://example.com")));
}
#[test]
fn test_false_positives() {
let filter = Filter::default();
assert!(filter.is_excluded(&website("http://www.w3.org/1999/xhtml")));
assert!(filter.is_excluded(&website(
"http://schemas.openxmlformats.org/markup-compatibility/2006"
)));
assert!(!filter.is_excluded(&website("https://example.com")));
}
#[test]
fn test_overwrite_false_positives() {
let includes = Includes {
regex: RegexSet::new([r"http://www.w3.org/1999/xhtml"]).unwrap(),
};
let filter = Filter {
includes: Some(includes),
..Filter::default()
};
assert!(!filter.is_excluded(&website("http://www.w3.org/1999/xhtml")));
}
#[test]
fn test_include_regex() {
let includes = Includes {
regex: RegexSet::new([r"foo.example.com"]).unwrap(),
};
let filter = Filter {
includes: Some(includes),
..Filter::default()
};
assert!(!filter.is_excluded(&website("https://foo.example.com")));
assert!(filter.is_excluded(&website("https://bar.example.com")));
assert!(filter.is_excluded(&website("https://example.com")));
}
#[test]
fn test_exclude_mail() {
let filter = Filter {
exclude_mail: true,
..Filter::default()
};
assert!(filter.is_excluded(&mail("mail@example.com")));
assert!(filter.is_excluded(&mail("foo@bar.dev")));
assert!(!filter.is_excluded(&website("http://bar.dev")));
}
#[test]
fn test_exclude_regex() {
let excludes = Excludes {
regex: RegexSet::new([r"github.com", r"[a-z]+\.(org|net)", r"@example.com"]).unwrap(),
};
let filter = Filter {
excludes: Some(excludes),
..Filter::default()
};
assert!(filter.is_excluded(&website("https://github.com")));
assert!(filter.is_excluded(&website("http://exclude.org")));
assert!(filter.is_excluded(&mail("mail@example.com")));
assert!(!filter.is_excluded(&website("http://bar.dev")));
assert!(!filter.is_excluded(&mail("foo@bar.dev")));
}
#[test]
fn test_exclude_include_regex() {
let includes = Includes {
regex: RegexSet::new([r"foo.example.com"]).unwrap(),
};
let excludes = Excludes {
regex: RegexSet::new([r"example.com"]).unwrap(),
};
let filter = Filter {
includes: Some(includes),
excludes: Some(excludes),
..Filter::default()
};
assert!(!filter.is_excluded(&website("https://foo.example.com")),);
assert!(filter.is_excluded(&website("https://example.com")));
assert!(filter.is_excluded(&website("https://bar.example.com")));
}
#[test]
fn test_excludes_no_private_ips_by_default() {
let filter = Filter::default();
assert!(!filter.is_excluded(&website(V4_PRIVATE_CLASS_A)));
assert!(!filter.is_excluded(&website(V4_PRIVATE_CLASS_B)));
assert!(!filter.is_excluded(&website(V4_PRIVATE_CLASS_C)));
assert!(!filter.is_excluded(&website(V4_LINK_LOCAL_1)));
assert!(!filter.is_excluded(&website(V4_LINK_LOCAL_2)));
assert!(!filter.is_excluded(&website(V4_LOOPBACK)));
assert!(!filter.is_excluded(&website(V6_LOOPBACK)));
assert!(!filter.is_excluded(&website("http://localhost")));
}
#[test]
fn test_exclude_private_ips() {
let filter = Filter {
exclude_private_ips: true,
..Filter::default()
};
assert!(filter.is_excluded(&website(V4_PRIVATE_CLASS_A)));
assert!(filter.is_excluded(&website(V4_PRIVATE_CLASS_B)));
assert!(filter.is_excluded(&website(V4_PRIVATE_CLASS_C)));
}
#[test]
fn test_exclude_link_local() {
let filter = Filter {
exclude_link_local_ips: true,
..Filter::default()
};
assert!(filter.is_excluded(&website(V4_LINK_LOCAL_1)));
assert!(filter.is_excluded(&website(V4_LINK_LOCAL_2)));
}
#[test]
fn test_exclude_loopback() {
let filter = Filter {
exclude_loopback_ips: true,
..Filter::default()
};
assert!(filter.is_excluded(&website(V4_LOOPBACK)));
assert!(filter.is_excluded(&website(V6_LOOPBACK)));
assert!(filter.is_excluded(&website("http://localhost")));
}
#[test]
fn test_exclude_ip_v4_mapped_ip_v6_not_supported() {
let filter = Filter {
exclude_private_ips: true,
exclude_link_local_ips: true,
..Filter::default()
};
assert!(!filter.is_excluded(&website(V6_MAPPED_V4_PRIVATE_CLASS_A)));
assert!(!filter.is_excluded(&website(V6_MAPPED_V4_LINK_LOCAL)));
}
}