#![doc(html_root_url = "http://wooya.me/tldextract-rs/tldextract/index.html")]
#![deny(missing_docs)]
mod cache;
#[allow(missing_docs)]
pub mod errors;
pub use errors::{Result, TldExtractError};
use idna::punycode;
use std::collections::HashSet;
use url::{Host, Url};
#[derive(Default, Debug)]
pub struct TldOption {
cache_path: Option<String>,
private_domains: bool,
update_local: bool,
naive_mode: bool,
}
impl TldOption {
pub fn cache_path(mut self, path: &str) -> Self {
self.cache_path = Some(path.into());
self
}
pub fn private_domains(mut self, b: bool) -> Self {
self.private_domains = b;
self
}
pub fn update_local(mut self, b: bool) -> Self {
self.update_local = b;
self
}
pub fn naive_mode(mut self, b: bool) -> Self {
self.naive_mode = b;
self
}
pub fn build(self) -> TldExtractor {
TldExtractor::new(self)
}
}
#[derive(Debug)]
pub struct TldExtractor {
tld_cache: HashSet<String>,
naive_mode: bool,
}
impl TldExtractor {
pub fn new(option: TldOption) -> TldExtractor {
let cache_path = option.cache_path.as_ref().map(|s| &s[..]);
let tld_cache = cache::get_tld_cache(cache_path, option.private_domains);
if option.update_local {
let _ = cache::set_tld_cache(cache_path, &tld_cache);
}
TldExtractor {
tld_cache,
naive_mode: option.naive_mode,
}
}
pub fn extract(&self, url: &str) -> Result<TldResult> {
self._extract(url, None)
}
pub fn extract_naive(&self, url: &str) -> Result<TldResult> {
self._extract(url, true)
}
fn _extract<O: Into<Option<bool>>>(&self, url: &str, naive: O) -> Result<TldResult> {
if url.contains(':') {
let u = Url::parse(url)?;
let host = u
.host()
.ok_or_else(|| TldExtractError::NoHostError(url.into()))?;
match host {
Host::Domain(host) => {
Ok(self.extract_triple(host, naive.into().unwrap_or(self.naive_mode)))
}
Host::Ipv4(ip) => Ok(TldResult {
domain: Some(ip.to_string()),
..Default::default()
}),
Host::Ipv6(ip) => Ok(TldResult {
domain: Some(ip.to_string()),
..Default::default()
}),
}
} else {
Ok(self.extract_triple(url, naive.into().unwrap_or(self.naive_mode)))
}
}
fn extract_triple(&self, host: &str, naive_mode: bool) -> TldResult {
let segs: Vec<_> = host
.split('.')
.filter(|&s| !s.is_empty())
.map(|seg| {
if seg.starts_with("xn--") {
punycode::decode_to_string(seg.trim_start_matches("xn--")).unwrap_or(seg.into())
} else {
seg.into()
}
})
.collect();
let mut suffix = None;
let mut subdomain = None;
let mut domain = None;
for i in 0..segs.len() {
let piece = segs[i..].join(".");
let exception_piece = "!".to_string() + &piece;
let wildcard_piece = "*.".to_string() + &segs[i + 1..].join(".");
if self.tld_cache.get(&exception_piece).is_some() {
continue;
}
if self
.tld_cache
.get(&piece)
.or_else(|| self.tld_cache.get(&wildcard_piece))
.is_some()
{
suffix = Some(piece);
if i != 0 {
domain = Some(segs[i - 1].to_string());
subdomain = if segs[0..i - 1].is_empty() {
None
} else {
Some(segs[0..i - 1].join("."))
};
}
break;
}
}
if let (None, None, None) = (subdomain.as_ref(), domain.as_ref(), suffix.as_ref()) {
let mut iter = segs.into_iter().rev();
if naive_mode {
suffix = iter.next().map(|s| s.to_string());
}
domain = iter.next().map(|s| s.to_string());
let maybe_subdomain = iter.collect::<Vec<_>>().join(".");
subdomain = if maybe_subdomain.is_empty() {
None
} else {
Some(maybe_subdomain)
}
}
TldResult {
suffix,
subdomain,
domain,
}
}
}
#[derive(Debug, Default, PartialEq, Eq)]
pub struct TldResult {
pub domain: Option<String>,
pub subdomain: Option<String>,
pub suffix: Option<String>,
}
impl TldResult {
pub fn new<'a, O, P, Q>(subdomain: O, domain: P, suffix: Q) -> TldResult
where
O: Into<Option<&'a str>>,
P: Into<Option<&'a str>>,
Q: Into<Option<&'a str>>,
{
TldResult {
domain: domain.into().map(|s| s.into()),
subdomain: subdomain.into().map(|s| s.into()),
suffix: suffix.into().map(|s| s.into()),
}
}
}