mod matcher;
#[cfg(feature = "remote_list")]
#[cfg(test)]
mod tests;
use std::{collections::HashMap, fmt, fs::File, io::Read, net::IpAddr, path::Path, str::FromStr};
#[cfg(feature = "remote_list")]
use std::{io::Write, net::TcpStream, time::Duration};
pub mod errors;
pub use crate::errors::{Error, ErrorKind, Result};
use idna::domain_to_unicode;
#[cfg(feature = "remote_list")]
use native_tls::TlsConnector;
use url::Url;
pub const LIST_URL: &str = "https://publicsuffix.org/list/public_suffix_list.dat";
const PREVAILING_STAR_RULE: &str = "*";
#[derive(Debug, PartialEq, Eq, Hash)]
struct Suffix {
rule: String,
typ: Type,
}
#[derive(Debug)]
struct ListLeaf {
typ: Type,
is_exception_rule: bool,
}
impl ListLeaf {
fn new(typ: Type, is_exception_rule: bool) -> Self {
Self {
typ,
is_exception_rule,
}
}
}
#[derive(Debug)]
struct ListNode {
children: HashMap<String, ListNode>,
leaf: Option<ListLeaf>,
}
impl ListNode {
fn new() -> Self {
Self {
children: HashMap::new(),
leaf: None,
}
}
}
#[derive(Debug)]
pub struct List {
root: ListNode,
all: Vec<Suffix>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum Type {
Icann,
Private,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Domain {
full: String,
typ: Option<Type>,
suffix: Option<String>,
registrable: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Host {
Ip(IpAddr),
Domain(Domain),
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct DnsName {
name: String,
domain: Option<Domain>,
}
pub trait IntoUrl {
fn into_url(self) -> Result<Url>;
}
impl IntoUrl for Url {
fn into_url(self) -> Result<Url> {
Ok(self)
}
}
impl<'a> IntoUrl for &'a str {
fn into_url(self) -> Result<Url> {
Ok(Url::parse(self)?)
}
}
impl<'a> IntoUrl for &'a String {
fn into_url(self) -> Result<Url> {
Ok(Url::parse(self)?)
}
}
impl IntoUrl for String {
fn into_url(self) -> Result<Url> {
Ok(Url::parse(&self)?)
}
}
#[cfg(feature = "remote_list")]
fn request<U: IntoUrl>(u: U) -> Result<String> {
let url = u.into_url()?;
let host = match url.host_str() {
Some(host) => host,
None => {
return Err(ErrorKind::NoHost.into());
}
};
let port = match url.port_or_known_default() {
Some(port) => port,
None => {
return Err(ErrorKind::NoPort.into());
}
};
let data = format!("GET {} HTTP/1.0\r\nHost: {}\r\n\r\n", url.path(), host);
let addr = format!("{}:{}", host, port);
let stream = TcpStream::connect(addr)?;
let timeout = Duration::from_secs(2);
stream.set_read_timeout(Some(timeout))?;
stream.set_write_timeout(Some(timeout))?;
let mut res = String::new();
match url.scheme() {
scheme if scheme == "https" => {
let connector = TlsConnector::builder().build()?;
let mut stream = connector.connect(host, stream)?;
stream.write_all(data.as_bytes())?;
stream.read_to_string(&mut res)?;
}
scheme if scheme == "http" => {
let mut stream = stream;
stream.write_all(data.as_bytes())?;
stream.read_to_string(&mut res)?;
}
_ => {
return Err(ErrorKind::UnsupportedScheme.into());
}
}
Ok(res)
}
impl List {
fn append(&mut self, mut rule: &str, typ: Type) -> Result<()> {
let mut is_exception_rule = false;
if rule.starts_with('!') {
is_exception_rule = true;
rule = &rule[1..];
}
let mut current = &mut self.root;
for label in rule.rsplit('.') {
if label.is_empty() {
return Err(ErrorKind::InvalidRule(rule.into()).into());
}
let cur = current;
current = cur
.children
.entry(label.to_owned())
.or_insert_with(ListNode::new);
}
current.leaf = Some(ListLeaf::new(typ, is_exception_rule));
self.all.push(Suffix {
rule: rule.to_owned(),
typ,
});
Ok(())
}
fn build(res: &str) -> Result<List> {
let mut typ = None;
let mut list = List::empty();
for line in res.lines() {
match line {
line if line.contains("BEGIN ICANN DOMAINS") => {
typ = Some(Type::Icann);
}
line if line.contains("BEGIN PRIVATE DOMAINS") => {
typ = Some(Type::Private);
}
line if line.starts_with("//") => {
continue;
}
line => match typ {
Some(typ) => {
let rule = match line.split_whitespace().next() {
Some(rule) => rule,
None => continue,
};
list.append(rule, typ)?;
}
None => {
continue;
}
},
}
}
if list.root.children.is_empty() || list.all().is_empty() {
return Err(ErrorKind::InvalidList.into());
}
list.append(PREVAILING_STAR_RULE, Type::Icann)?;
Ok(list)
}
pub fn from_string(string: String) -> Result<List> {
Self::from_str(&string)
}
#[allow(clippy::should_implement_trait)]
pub fn from_str(string: &str) -> Result<List> {
Self::build(string)
}
pub fn empty() -> List {
List {
root: ListNode::new(),
all: Vec::new(),
}
}
#[cfg(feature = "remote_list")]
pub fn from_url<U: IntoUrl>(url: U) -> Result<List> {
let s = request(url)?;
Self::from_str(&s)
}
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<List> {
File::open(path)
.map_err(|err| ErrorKind::Io(err).into())
.and_then(|mut data| {
let mut res = String::new();
data.read_to_string(&mut res)?;
Self::from_str(&res)
})
}
pub fn from_reader<R: Read>(mut reader: R) -> Result<List> {
let mut res = String::new();
reader.read_to_string(&mut res)?;
Self::build(&res)
}
#[cfg(feature = "remote_list")]
pub fn fetch() -> Result<List> {
let github =
"https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat";
Self::from_url(LIST_URL)
.or_else(|_| Self::from_url(github))
}
fn find_type(&self, typ: Type) -> Vec<&str> {
self.all_internal()
.filter(|s| s.typ == typ)
.map(|s| s.rule.as_str())
.collect()
}
pub fn icann(&self) -> Vec<&str> {
self.find_type(Type::Icann)
}
pub fn private(&self) -> Vec<&str> {
self.find_type(Type::Private)
}
pub fn all(&self) -> Vec<&str> {
self.all_internal().map(|s| s.rule.as_str()).collect()
}
fn all_internal(&self) -> impl Iterator<Item = &Suffix> {
self.all
.iter()
.filter(|s| s.rule != PREVAILING_STAR_RULE)
}
pub fn parse_domain(&self, domain: &str) -> Result<Domain> {
Domain::parse(domain, self, true)
}
pub fn parse_host(&self, host: &str) -> Result<Host> {
Host::parse(host, self)
}
pub fn parse_url<U: IntoUrl>(&self, url: U) -> Result<Host> {
let url = url.into_url()?;
match url.scheme() {
"mailto" => match url.host_str() {
Some(host) => self.parse_email(&format!("{}@{}", url.username(), host)),
None => Err(ErrorKind::InvalidEmail.into()),
},
_ => match url.host_str() {
Some(host) => self.parse_host(host),
None => Err(ErrorKind::NoHost.into()),
},
}
}
pub fn parse_email(&self, address: &str) -> Result<Host> {
let mut parts = address.rsplitn(2, '@');
let host = match parts.next() {
Some(host) => host,
None => {
return Err(ErrorKind::InvalidEmail.into());
}
};
let local = match parts.next() {
Some(local) => local,
None => {
return Err(ErrorKind::InvalidEmail.into());
}
};
if local.chars().count() > 64
|| address.chars().count() > 254
|| (!local.starts_with('"') && local.contains(".."))
|| !matcher::is_email_local(local)
{
return Err(ErrorKind::InvalidEmail.into());
}
self.parse_host(host)
}
pub fn parse_str(&self, string: &str) -> Result<Host> {
if string.contains("://") {
self.parse_url(string)
} else if string.contains('@') {
self.parse_email(string)
} else {
self.parse_host(string)
}
}
pub fn parse_dns_name(&self, name: &str) -> Result<DnsName> {
let mut dns_name = DnsName {
name: Domain::try_to_ascii(name).map_err(|_| ErrorKind::InvalidDomain(name.into()))?,
domain: None,
};
if let Ok(mut domain) = Domain::parse(name, self, false) {
if let Some(root) = domain.root() {
if Domain::has_valid_syntax(&root) {
domain.full = root.to_string();
dns_name.domain = Some(domain);
}
}
}
Ok(dns_name)
}
}
impl Host {
fn parse(mut host: &str, list: &List) -> Result<Host> {
if let Ok(domain) = Domain::parse(host, list, true) {
return Ok(Host::Domain(domain));
}
if host.starts_with('[')
&& !host.starts_with("[[")
&& host.ends_with(']')
&& !host.ends_with("]]")
{
host = host.trim_start_matches('[').trim_end_matches(']');
};
if let Ok(ip) = IpAddr::from_str(host) {
return Ok(Host::Ip(ip));
}
Err(ErrorKind::InvalidHost.into())
}
pub fn is_ip(&self) -> bool {
if let Host::Ip(_) = self {
return true;
}
false
}
pub fn is_domain(&self) -> bool {
if let Host::Domain(_) = self {
return true;
}
false
}
}
impl fmt::Display for Host {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Host::Ip(ref ip) => write!(f, "{}", ip),
Host::Domain(ref domain) => write!(f, "{}", domain),
}
}
}
impl Domain {
pub fn has_valid_syntax(domain: &str) -> bool {
if domain.starts_with('.') {
return false;
}
let domain = match Self::try_to_ascii(domain) {
Ok(domain) => domain,
Err(_) => {
return false;
}
};
let mut labels: Vec<&str> = domain.split('.').collect();
if domain.ends_with('.') {
labels.pop();
}
if labels.len() > 127 {
return false;
}
labels.reverse();
for (i, label) in labels.iter().enumerate() {
if i == 0 && label.parse::<f64>().is_ok() {
return false;
}
if !matcher::is_label(label) {
return false;
}
}
true
}
pub fn full(&self) -> &str {
&self.full
}
fn assemble(input: &str, s_len: usize) -> String {
let domain = input.to_lowercase();
let d_labels: Vec<&str> = domain.trim_end_matches('.').split('.').rev().collect();
(&d_labels[..s_len])
.iter()
.rev()
.copied()
.collect::<Vec<_>>()
.join(".")
}
fn find_match(input: &str, domain: &str, list: &List) -> Domain {
let mut longest_valid = None;
let mut current = &list.root;
let mut s_labels_len = 0;
let mut wildcard_match = false;
for label in domain.rsplit('.') {
if let Some(child) = current.children.get(label) {
current = child;
s_labels_len += 1;
} else if let Some(child) = current.children.get("*") {
current = child;
s_labels_len += 1;
wildcard_match = true;
} else {
break;
}
if let Some(list_leaf) = ¤t.leaf {
longest_valid = Some((list_leaf, s_labels_len));
}
}
match longest_valid {
Some((leaf, suffix_len)) => {
let typ = if !wildcard_match {
Some(leaf.typ)
} else {
None
};
let suffix_len = if leaf.is_exception_rule {
suffix_len - 1
} else {
suffix_len
};
let suffix = Some(Self::assemble(input, suffix_len));
let d_labels_len = domain.match_indices('.').count() + 1;
let registrable = if d_labels_len > suffix_len {
Some(Self::assemble(input, suffix_len + 1))
} else {
None
};
Domain {
full: input.to_owned(),
typ,
suffix,
registrable,
}
}
None => Domain {
full: input.to_owned(),
typ: None,
suffix: None,
registrable: None,
},
}
}
fn try_to_ascii(domain: &str) -> Result<String> {
let result = idna::Config::default()
.transitional_processing(true)
.verify_dns_length(true)
.to_ascii(domain);
result.map_err(|error| ErrorKind::Uts46(error).into())
}
fn parse(domain: &str, list: &List, check_syntax: bool) -> Result<Domain> {
if check_syntax && !Self::has_valid_syntax(domain) {
return Err(ErrorKind::InvalidDomain(domain.into()).into());
}
let input = domain.trim_end_matches('.');
let (domain, res) = domain_to_unicode(input);
if let Err(errors) = res {
return Err(ErrorKind::Uts46(errors).into());
}
Ok(Self::find_match(input, &domain, list))
}
pub fn root(&self) -> Option<&str> {
self.registrable.as_ref().map(|x| &x[..])
}
pub fn suffix(&self) -> Option<&str> {
self.suffix.as_ref().map(|x| &x[..])
}
pub fn is_private(&self) -> bool {
self.typ.map(|t| t == Type::Private).unwrap_or(false)
}
pub fn is_icann(&self) -> bool {
self.typ.map(|t| t == Type::Icann).unwrap_or(false)
}
pub fn has_known_suffix(&self) -> bool {
self.typ.is_some()
}
}
impl fmt::Display for Domain {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.full.trim_end_matches('.').to_lowercase())
}
}
impl DnsName {
pub fn domain(&self) -> Option<&Domain> {
self.domain.as_ref()
}
}
impl fmt::Display for DnsName {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.name.fmt(f)
}
}